Add diskcheck-daemon.
With a small disk being 20GB these days, chances are pretty good that an ailing sector will not be read while still being recoverable by the drive. Diskcheck daemon will read disks in the background at a low rate and that way give the diskdrive a chance to detect and correct soft read errors before they become hard errors. Idea by: phk Written by: ben
This commit is contained in:
parent
2d754db7e6
commit
849f35cf88
18
etc/diskcheckd.conf
Normal file
18
etc/diskcheckd.conf
Normal file
@ -0,0 +1,18 @@
|
||||
# $FreeBSD$
|
||||
#
|
||||
# Configuration file for diskcheckd, refer to the diskcheckd.conf(5) manual
|
||||
# page for further information.
|
||||
#
|
||||
# The device name may be specified as "*", in which case all devices are
|
||||
# checked.
|
||||
#
|
||||
# The size may be specified as "*", in which case the size is automatically
|
||||
# determined from the disklabel.
|
||||
#
|
||||
# One of either "Days" or "Rate" must be specified, to specify a complete
|
||||
# scan should happen once every n days, or that scanning should take place
|
||||
# at n KB/s. Note that the "Days" value will not cause a scan exactly every
|
||||
# n days since diskcheckd will always read in power-of-2 size blocks.
|
||||
#
|
||||
# Device Size Days Rate
|
||||
* * 28 *
|
@ -1518,6 +1518,20 @@ is set to
|
||||
these are the flags to pass to the
|
||||
.Xr sendmail 8
|
||||
daemon.
|
||||
.It Ar diskcheckd_enable
|
||||
(bool) Set to
|
||||
.Ar YES
|
||||
if you want to start
|
||||
.Xr diskcheckd 8
|
||||
at system boot time.
|
||||
.It Ar diskcheckd_flags
|
||||
If
|
||||
.Ar diskcheckd_enable
|
||||
is set to
|
||||
.Ar YES ,
|
||||
these are the flags to pass to the
|
||||
.Xr diskcheckd 8
|
||||
daemon.
|
||||
.It Va dumpdev
|
||||
.Pq Vt str
|
||||
If not set to
|
||||
|
@ -24,6 +24,7 @@ SUBDIR= IPXrouted \
|
||||
dev_mkdb \
|
||||
devinfo \
|
||||
digictl \
|
||||
diskcheckd \
|
||||
diskpart \
|
||||
edquota \
|
||||
extattrctl \
|
||||
|
12
usr.sbin/diskcheckd/Makefile
Normal file
12
usr.sbin/diskcheckd/Makefile
Normal file
@ -0,0 +1,12 @@
|
||||
# $FreeBSD$
|
||||
|
||||
CFLAGS+= -W -Wall -g -O0
|
||||
|
||||
PROG= diskcheckd
|
||||
MAN8= diskcheckd.8
|
||||
|
||||
MLINKS= diskcheckd.8 diskcheckd.conf.5
|
||||
|
||||
BINDIR?= /usr/sbin
|
||||
|
||||
.include <bsd.prog.mk>
|
186
usr.sbin/diskcheckd/diskcheckd.8
Normal file
186
usr.sbin/diskcheckd/diskcheckd.8
Normal file
@ -0,0 +1,186 @@
|
||||
.\" Copyright (c) 2000, 2001 Ben Smithurst <ben@FreeBSD.org>
|
||||
.\" All rights reserved.
|
||||
.\"
|
||||
.\" Redistribution and use in source and binary forms, with or without
|
||||
.\" modification, are permitted provided that the following conditions
|
||||
.\" are met:
|
||||
.\" 1. Redistributions of source code must retain the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer.
|
||||
.\" 2. Redistributions in binary form must reproduce the above copyright
|
||||
.\" notice, this list of conditions and the following disclaimer in the
|
||||
.\" documentation and/or other materials provided with the distribution.
|
||||
.\"
|
||||
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
.\" SUCH DAMAGE.
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd December 30, 2000
|
||||
.Dt DISKCHECKD 8
|
||||
.Os FreeBSD
|
||||
.Sh NAME
|
||||
.Nm diskcheckd
|
||||
.Nd daemon to check for disk read errors
|
||||
.Sh SYNOPSIS
|
||||
.Nm
|
||||
.Op Fl d
|
||||
.Op Fl f Ar conf_file
|
||||
.Op Fl o Ar save_file
|
||||
.Sh DESCRIPTION
|
||||
.Nm
|
||||
is a daemon which runs in the background,
|
||||
reading entire disks to find any read errors on those disks.
|
||||
The disks which should be scanned,
|
||||
and the rates at which they should be scanned,
|
||||
must be specified in the configuration file,
|
||||
which is
|
||||
.Pa /etc/diskcheckd.conf
|
||||
by default.
|
||||
.Pp
|
||||
Any blank lines or lines starting with a
|
||||
.Ql #
|
||||
character in this file are ignored.
|
||||
Each non-comment line of this file should contain four white space separated
|
||||
fields,
|
||||
which are the full pathname of the disk device,
|
||||
the size of that disk,
|
||||
the frequency in days at which to check that disk,
|
||||
and the rate in kilobytes per second at which to check this disk.
|
||||
Naturally,
|
||||
it would be contradictory to specify both the frequency and the rate,
|
||||
so only one of these should be specified.
|
||||
Additionally,
|
||||
the size of the disk should not be specified if the rate is specified,
|
||||
as this information is unnecessary.
|
||||
.Pp
|
||||
If the disk is specified as
|
||||
.Dq * ,
|
||||
then
|
||||
.Nm
|
||||
will apply the given settings to all disks in the system,
|
||||
obtained using the
|
||||
.Va kern.disks
|
||||
sysctl variable.
|
||||
If the size is specified as
|
||||
.Dq *
|
||||
(recommended),
|
||||
then the size of the disk will be automatically determined from the
|
||||
disklabel,
|
||||
if possible.
|
||||
Fields which are not specified should contain a single
|
||||
.Dq *
|
||||
character.
|
||||
.Pp
|
||||
Note that any rate specified will be rounded to the nearest power of 2,
|
||||
and will be forced into the range 512 bytes to 128 kilobytes,
|
||||
inclusive.
|
||||
Any rate calculated from the frequency and size information will be rounded
|
||||
in the same way,
|
||||
so if you specify the third field (days for complete scan) it is unlikely
|
||||
that a complete scan will actually take exactly this many days.
|
||||
.Pp
|
||||
To run
|
||||
.Nm
|
||||
automatically at boot time,
|
||||
the
|
||||
.Ar diskcheckd_enable
|
||||
variable in
|
||||
.Xr rc.conf 5
|
||||
should be set to
|
||||
.Dq YES .
|
||||
.Pp
|
||||
When
|
||||
.Nm
|
||||
receives a
|
||||
.Dv SIGTERM
|
||||
or
|
||||
.Dv SIGINT
|
||||
signal,
|
||||
it saves its current state information to a file,
|
||||
so that after a reboot
|
||||
.Nm
|
||||
can resume reading from where it left off,
|
||||
rather than starting from the beginning of the disk again.
|
||||
The information saved to this file consists of the device filename and the
|
||||
current offset into that device.
|
||||
.Pp
|
||||
.Nm
|
||||
can be instructed to reload the configuration file by sending it a
|
||||
.Dv SIGHUP
|
||||
signal.
|
||||
.Pp
|
||||
.Nm
|
||||
accepts the following command line options:
|
||||
.Pp
|
||||
.Bl -tag -width Fl
|
||||
.It Fl d
|
||||
If this flag is specified,
|
||||
.Nm
|
||||
will not fork into the background and detach from its controlling terminal
|
||||
to become a daemon.
|
||||
This flag is primarily used for debugging.
|
||||
.It Fl f
|
||||
Specify the configuration file to use,
|
||||
instead of the default
|
||||
.Pa /etc/diskcheckd.conf .
|
||||
.It Fl o
|
||||
Specify the file to save disk offsets to,
|
||||
instead of the default
|
||||
.Pa /var/db/diskcheckd.offsets .
|
||||
.El
|
||||
.Sh FILES
|
||||
.Bl -tag -width /var/db/diskcheckd.offsets -compact
|
||||
.It Pa /etc/diskcheckd.conf
|
||||
Default configuration file.
|
||||
.It Pa /var/db/diskcheckd.offsets
|
||||
Default location of saved offsets.
|
||||
.El
|
||||
.Sh EXAMPLES
|
||||
To check all of
|
||||
.Pa /dev/ad0
|
||||
for errors once every two weeks,
|
||||
use this entry in
|
||||
.Pa diskcheckd.conf :
|
||||
.Pp
|
||||
.Bd -literal -offset indent
|
||||
/dev/ad0 * 14 *
|
||||
.Ed
|
||||
.Pp
|
||||
To check the first SCSI disk for errors at approximately 64KB/s,
|
||||
use the following entry:
|
||||
.Pp
|
||||
.Bd -literal -offset indent
|
||||
/dev/da0 * * 64
|
||||
.Ed
|
||||
.Pp
|
||||
To check all disks once every four weeks:
|
||||
.Pp
|
||||
.Bd -literal -offset indent
|
||||
* * 28 *
|
||||
.Ed
|
||||
.Sh DIAGNOSTICS
|
||||
If any errors occur,
|
||||
they will be written to
|
||||
.Xr syslogd 8 .
|
||||
.Sh HISTORY
|
||||
.Nm
|
||||
first appeared in
|
||||
.Fx 5.0 .
|
||||
.Sh AUTHORS
|
||||
.Nm
|
||||
and this manual page were written by
|
||||
.An Ben Smithurst Aq ben@FreeBSD.org ,
|
||||
with input from
|
||||
.An Poul-Henning Kamp Aq phk@FreeBSD.org .
|
||||
.Sh BUGS
|
||||
.Nm
|
||||
assumes all disks have 512 byte sectors.
|
815
usr.sbin/diskcheckd/diskcheckd.c
Normal file
815
usr.sbin/diskcheckd/diskcheckd.c
Normal file
@ -0,0 +1,815 @@
|
||||
/*-
|
||||
* Copyright (c) 2000, 2001 Ben Smithurst <ben@FreeBSD.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
static const char rcsid[] =
|
||||
"$FreeBSD$";
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <err.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <paths.h>
|
||||
#include <signal.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <syslog.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define DKTYPENAMES
|
||||
#include <sys/disklabel.h>
|
||||
|
||||
#define _PATH_CONF "/etc/diskcheckd.conf"
|
||||
#define _PATH_SAVE _PATH_VARDB"diskcheckd.offsets"
|
||||
|
||||
#define MAXRATE (128 << 10)
|
||||
#define MINRATE 512
|
||||
|
||||
struct disk {
|
||||
int fd;
|
||||
char *device;
|
||||
off_t size;
|
||||
int secsize;
|
||||
int days, rate, errors;
|
||||
};
|
||||
|
||||
volatile sig_atomic_t got_sighup = 0, got_sigterm = 0;
|
||||
|
||||
char **getdisknames(void);
|
||||
off_t dseek(struct disk *, off_t, int);
|
||||
struct disk *readconf(const char *);
|
||||
void getdisksize(struct disk *);
|
||||
void logreaderror(struct disk *, int);
|
||||
void readchunk(struct disk *, char *);
|
||||
void readoffsets(struct disk *, const char *);
|
||||
void setdiskrate(struct disk *);
|
||||
void sighup(int);
|
||||
void sigterm(int);
|
||||
void updateproctitle(struct disk *);
|
||||
void usage(void);
|
||||
void writeoffsets(struct disk *, const char *);
|
||||
|
||||
int
|
||||
main(int argc, char *argv[]) {
|
||||
char *buf;
|
||||
struct disk *disks, *dp;
|
||||
int ch, ok;
|
||||
struct sigaction sa;
|
||||
int counter, debug;
|
||||
const char *conf_file, *save_file;
|
||||
|
||||
conf_file = _PATH_CONF;
|
||||
save_file = _PATH_SAVE;
|
||||
debug = 0;
|
||||
|
||||
while ((ch = getopt(argc, argv, "df:o:")) != -1)
|
||||
switch (ch) {
|
||||
case 'd':
|
||||
debug = 1;
|
||||
break;
|
||||
case 'f':
|
||||
conf_file = optarg;
|
||||
break;
|
||||
case 'o':
|
||||
save_file = optarg;
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
/* NOTREACHED */
|
||||
}
|
||||
|
||||
argv += optind;
|
||||
argc -= optind;
|
||||
|
||||
if (argc != 0)
|
||||
usage();
|
||||
|
||||
openlog("diskcheckd", LOG_CONS|LOG_PID, LOG_DAEMON);
|
||||
|
||||
if (!debug && daemon(0, 0) < 0) {
|
||||
syslog(LOG_NOTICE, "daemon failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
sa.sa_handler = sigterm;
|
||||
sa.sa_flags = SA_RESTART;
|
||||
sigemptyset(&sa.sa_mask);
|
||||
sigaction(SIGTERM, &sa, NULL);
|
||||
sigaction(SIGINT, &sa, NULL);
|
||||
|
||||
sa.sa_handler = sighup;
|
||||
sigaction(SIGHUP, &sa, NULL);
|
||||
|
||||
/* Read the configuration file and the saved offsets */
|
||||
disks = readconf(conf_file);
|
||||
readoffsets(disks, save_file);
|
||||
|
||||
if ((buf = malloc(MAXRATE)) == NULL) {
|
||||
syslog(LOG_NOTICE, "malloc failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* The main disk checking loop. */
|
||||
counter = 0;
|
||||
while (!got_sigterm) {
|
||||
ok = 0;
|
||||
for (dp = disks; dp->device != NULL; dp++)
|
||||
if (dp->fd != -1) {
|
||||
ok = 1;
|
||||
readchunk(dp, buf);
|
||||
}
|
||||
|
||||
if (!ok) {
|
||||
syslog(LOG_EMERG, "all disks had read errors");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
if (counter % 300 == 0) {
|
||||
updateproctitle(disks);
|
||||
writeoffsets(disks, save_file);
|
||||
}
|
||||
|
||||
counter++;
|
||||
sleep(1);
|
||||
|
||||
if (got_sighup) {
|
||||
/*
|
||||
* Got a SIGHUP, so save the offsets, free the
|
||||
* memory used for the disk structures, and then
|
||||
* re-read the config file and the disk offsets.
|
||||
*/
|
||||
writeoffsets(disks, save_file);
|
||||
for (dp = disks; dp->device != NULL; dp++) {
|
||||
free(dp->device);
|
||||
close(dp->fd);
|
||||
}
|
||||
free(disks);
|
||||
disks = readconf(conf_file);
|
||||
readoffsets(disks, save_file);
|
||||
got_sighup = 0;
|
||||
}
|
||||
}
|
||||
|
||||
writeoffsets(disks, save_file);
|
||||
return (EXIT_SUCCESS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the next chunk from the specified disk, retrying if necessary.
|
||||
*/
|
||||
void
|
||||
readchunk(struct disk *dp, char *buf) {
|
||||
ssize_t n;
|
||||
int s;
|
||||
|
||||
n = read(dp->fd, buf, dp->rate);
|
||||
if (n == 0) {
|
||||
eof:
|
||||
syslog(LOG_INFO, "reached end of %s with %d errors",
|
||||
dp->device, dp->errors);
|
||||
dseek(dp, 0, SEEK_SET);
|
||||
dp->errors = 0;
|
||||
return;
|
||||
} else if (n > 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Read error, retry in smaller chunks.
|
||||
*/
|
||||
logreaderror(dp, dp->rate);
|
||||
|
||||
for (s = 0; s < dp->rate; s += 512) {
|
||||
n = read(dp->fd, buf, 512);
|
||||
if (n == 0)
|
||||
goto eof;
|
||||
else if (n < 0) {
|
||||
/* log the error and seek past it. */
|
||||
logreaderror(dp, 512);
|
||||
dseek(dp, 512, SEEK_CUR);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const char *
|
||||
fstypename(u_int8_t type) {
|
||||
static char buf[32];
|
||||
|
||||
if (type < FSMAXTYPES)
|
||||
return (fstypenames[type]);
|
||||
else {
|
||||
snprintf(buf, sizeof buf, "%u", type);
|
||||
return (buf);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Report a read error, logging how many bytes were trying to be read, which
|
||||
* sector they were being read from, and try to also find out what that sector
|
||||
* is used for.
|
||||
*/
|
||||
void
|
||||
logreaderror(struct disk *dp, int nbytes) {
|
||||
quad_t secno;
|
||||
off_t saved_offset;
|
||||
int fd, slice, part;
|
||||
struct dos_partition *dos;
|
||||
struct disklabel label;
|
||||
char buf[512];
|
||||
char newdev[512];
|
||||
|
||||
saved_offset = dseek(dp, 0, SEEK_CUR);
|
||||
secno = (quad_t)saved_offset / dp->secsize;
|
||||
dp->errors++;
|
||||
|
||||
syslog(LOG_NOTICE, "error reading %d bytes from sector %qd on %s",
|
||||
nbytes, secno, dp->device);
|
||||
|
||||
/*
|
||||
* First, find out which slice it's in. To do this, we seek to the
|
||||
* start of the disk, read the first sector, and go through the DOS
|
||||
* slice table.
|
||||
*/
|
||||
if (dseek(dp, 0, SEEK_SET) == -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
if (read(dp->fd, buf, sizeof buf) != sizeof buf) {
|
||||
dseek(dp, saved_offset, SEEK_SET);
|
||||
return;
|
||||
}
|
||||
|
||||
/* seek back to where we were */
|
||||
if (dseek(dp, saved_offset, SEEK_SET) == -1)
|
||||
exit(EXIT_FAILURE);
|
||||
|
||||
dos = (struct dos_partition *)&buf[DOSPARTOFF];
|
||||
for (slice = 0; slice < NDOSPART; slice++)
|
||||
if (dos[slice].dp_start <= secno &&
|
||||
secno < dos[slice].dp_start + dos[slice].dp_size)
|
||||
break;
|
||||
|
||||
if (slice == NDOSPART) {
|
||||
syslog(LOG_NOTICE,
|
||||
"sector %qd on %s doesn't appear "
|
||||
"to be within any DOS slice", secno, dp->device);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Make secno relative to this slice */
|
||||
secno -= dos[slice].dp_start;
|
||||
|
||||
snprintf(newdev, sizeof newdev, "%ss%d", dp->device, slice + 1);
|
||||
syslog(LOG_DEBUG, "bad sector seems to be within %s", newdev);
|
||||
|
||||
/* Check the type of that partition. */
|
||||
if (dos[slice].dp_typ != DOSPTYP_386BSD) {
|
||||
/* If not a BSD slice, we can't do much more. */
|
||||
syslog(LOG_NOTICE, "last bad sector is sector %qd "
|
||||
"on device %s, type %02x", secno, newdev,
|
||||
dos[slice].dp_typ);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((fd = open(newdev, O_RDONLY)) < 0) {
|
||||
syslog(LOG_NOTICE, "open %s failure: %m", newdev);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Try to read the disklabel from that device. */
|
||||
if (ioctl(fd, DIOCGDINFO, &label) < 0) {
|
||||
syslog(LOG_NOTICE, "DIOCGDINFO on %s failed: %m",
|
||||
newdev);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check which partition this sector is in. */
|
||||
for (part = 0; part < MAXPARTITIONS; part++)
|
||||
if (part != 2 && /* skip 'c' partition */
|
||||
label.d_partitions[part].p_offset <= secno &&
|
||||
secno < label.d_partitions[part].p_offset +
|
||||
label.d_partitions[part].p_size)
|
||||
break;
|
||||
|
||||
if (part == MAXPARTITIONS) {
|
||||
syslog(LOG_NOTICE,
|
||||
"sector %qd on %s doesn't appear "
|
||||
"to be within any BSD partition", secno, newdev);
|
||||
return;
|
||||
}
|
||||
|
||||
secno -= label.d_partitions[part].p_offset;
|
||||
snprintf(newdev, sizeof newdev, "%ss%d%c",
|
||||
dp->device, slice + 1, 'a' + part);
|
||||
syslog(LOG_DEBUG, "bad sector seems to be within %s", newdev);
|
||||
if (label.d_partitions[part].p_fstype != FS_BSDFFS) {
|
||||
/* Again, if not a BSD partition, can't do much. */
|
||||
syslog(LOG_NOTICE, "last bad sector is sector %qd "
|
||||
"on device %s, type %s", secno, newdev,
|
||||
fstypename(label.d_partitions[part].p_fstype));
|
||||
return;
|
||||
}
|
||||
|
||||
syslog(LOG_NOTICE, "last bad sector is sector %qd "
|
||||
"on 4.2BSD filesystem %s", secno, newdev);
|
||||
|
||||
/*
|
||||
* XXX: todo: find out which file on the BSD filesystem uses this
|
||||
* sector...
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the offsets written by writeoffsets().
|
||||
*/
|
||||
void
|
||||
readoffsets(struct disk *disks, const char *save_file) {
|
||||
FILE *fp;
|
||||
struct disk *dp;
|
||||
char *space, buf[1024];
|
||||
|
||||
if ((fp = fopen(save_file, "r")) == NULL) {
|
||||
if (errno != ENOENT)
|
||||
syslog(LOG_NOTICE, "open %s failed: %m", save_file);
|
||||
return;
|
||||
}
|
||||
|
||||
while (fgets(buf, sizeof buf, fp) != NULL) {
|
||||
if ((space = strchr(buf, ' ')) == NULL)
|
||||
continue;
|
||||
*space = '\0';
|
||||
|
||||
for (dp = disks;
|
||||
dp->device != NULL && strcmp(dp->device, buf) != 0; dp++)
|
||||
; /* nothing */
|
||||
|
||||
if (dp->device != NULL)
|
||||
dseek(dp, (off_t)strtoq(space + 1, NULL, 0), SEEK_SET);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Save the offsets we've reached for each disk to a file, so we can start
|
||||
* at that position next time the daemon is started.
|
||||
*/
|
||||
void
|
||||
writeoffsets(struct disk *disks, const char *save_file) {
|
||||
FILE *fp;
|
||||
struct disk *dp;
|
||||
|
||||
if ((fp = fopen(save_file, "w")) == NULL) {
|
||||
syslog(LOG_NOTICE, "open %s failed: %m", save_file);
|
||||
return;
|
||||
}
|
||||
|
||||
for (dp = disks; dp->device != NULL; dp++)
|
||||
if (strcmp(dp->device, "*") != 0)
|
||||
fprintf(fp, "%s %qd\n", dp->device,
|
||||
(quad_t)dseek(dp, 0, SEEK_CUR));
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the process title so it's easy to see using ps(1) how much has been
|
||||
* done.
|
||||
*/
|
||||
void
|
||||
updateproctitle(struct disk *disks) {
|
||||
struct disk *dp;
|
||||
char *bp, *p;
|
||||
static char *buf;
|
||||
static size_t bufsize;
|
||||
size_t size;
|
||||
int inc, ret;
|
||||
double percent;
|
||||
|
||||
bp = buf;
|
||||
size = bufsize;
|
||||
for (dp = disks; dp->device != NULL; dp++) {
|
||||
p = dp->device;
|
||||
if (strcmp(p, "*") == 0)
|
||||
continue;
|
||||
if (strncmp(p, _PATH_DEV, sizeof _PATH_DEV - 1) == 0)
|
||||
p += sizeof _PATH_DEV - 1;
|
||||
|
||||
percent = 100 * (double)dseek(dp, 0, SEEK_CUR) / dp->size;
|
||||
if ((size_t)(ret = snprintf(bp, size,
|
||||
"%s %.2f%%, ", p, percent)) >= size) {
|
||||
inc = ((ret + 1023) >> 10) << 10;
|
||||
size += inc;
|
||||
bufsize += inc;
|
||||
if ((buf = reallocf(buf, bufsize)) == NULL) {
|
||||
/* Not fatal. */
|
||||
syslog(LOG_NOTICE, "reallocf failure: %m");
|
||||
bufsize = 0;
|
||||
return;
|
||||
}
|
||||
bp = buf + bufsize - size;
|
||||
ret = snprintf(bp, size, "%s %.2f%%, ", p, percent);
|
||||
}
|
||||
bp += ret;
|
||||
size -= ret;
|
||||
}
|
||||
|
||||
if (buf != NULL) {
|
||||
/* Remove the trailing comma. */
|
||||
if (&bp[-2] >= buf)
|
||||
bp[-2] = '\0';
|
||||
setproctitle("%s", buf);
|
||||
}
|
||||
}
|
||||
|
||||
/* used to keep track of which fields have been specified */
|
||||
#define FL_SIZE_SPEC 1
|
||||
#define FL_DAYS_SPEC 2
|
||||
#define FL_RATE_SPEC 4
|
||||
|
||||
/*
|
||||
* Read the configuration file, set the rate appropriately for each disk,
|
||||
* and get a file descriptor for each disk.
|
||||
*/
|
||||
struct disk *
|
||||
readconf(const char *conf_file) {
|
||||
FILE *fp;
|
||||
char buf[1024], *line, *field, *ep, **np, **np0;
|
||||
int fields, flags;
|
||||
struct disk *disks, *dp, *odisks;
|
||||
int numdisks, onumdisks;
|
||||
double dval;
|
||||
long lval;
|
||||
int linenum;
|
||||
|
||||
if ((fp = fopen(conf_file, "r")) == NULL) {
|
||||
syslog(LOG_NOTICE, "open %s failure: %m", conf_file);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
numdisks = 0;
|
||||
disks = NULL;
|
||||
linenum = 0;
|
||||
|
||||
/* Step 1: read and parse the configuration file. */
|
||||
while (fgets(buf, sizeof buf, fp) != NULL) {
|
||||
line = buf;
|
||||
linenum++;
|
||||
while (isspace(*line))
|
||||
line++;
|
||||
if (*line == '#' || *line == '\n' || *line == '\0')
|
||||
continue;
|
||||
fields = flags = 0;
|
||||
while ((field = strsep(&line, " \t\n")) != NULL) {
|
||||
if (*field == '\0')
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If this is the first field on a line, allocate
|
||||
* space for one more disk structure.
|
||||
*/
|
||||
if (fields == 0 && (disks = reallocf(disks,
|
||||
(numdisks + 1) * sizeof (*disks))) == NULL) {
|
||||
syslog(LOG_NOTICE, "reallocf failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
dp = &disks[numdisks];
|
||||
switch (fields++) {
|
||||
case 0:
|
||||
/* device name */
|
||||
if ((dp->device = strdup(field)) == NULL) {
|
||||
syslog(LOG_NOTICE,
|
||||
"strdup failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
dp->fd = -1;
|
||||
dp->rate = -1;
|
||||
dp->size = -1;
|
||||
break;
|
||||
case 1:
|
||||
/* size */
|
||||
if (strcmp(field, "*") == 0)
|
||||
break;
|
||||
flags |= FL_SIZE_SPEC;
|
||||
dval = strtod(field, &ep);
|
||||
if (dval < 0) {
|
||||
syslog(LOG_NOTICE,
|
||||
"%s:%d: size cannot be negative",
|
||||
conf_file, linenum);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (strcasecmp(ep, "M") == 0)
|
||||
dp->size = dval * 1024 * 1024;
|
||||
else if (strcasecmp(ep, "G") == 0)
|
||||
dp->size = dval * 1024 * 1024 * 1024;
|
||||
else if (*ep == '\0')
|
||||
dp->size = dval;
|
||||
else {
|
||||
syslog(LOG_NOTICE,
|
||||
"%s:%d: bad suffix \"%s\" on "
|
||||
"size \"%s\"", conf_file,
|
||||
linenum, ep, field);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
/* days */
|
||||
if (strcmp(field, "*") == 0)
|
||||
break;
|
||||
flags |= FL_DAYS_SPEC;
|
||||
lval = strtol(field, &ep, 0);
|
||||
if (ep == field || *ep != '\0' ||
|
||||
lval <= 0) {
|
||||
syslog(LOG_NOTICE,
|
||||
"%s:%d: bad number of days",
|
||||
conf_file, linenum);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
dp->days = lval;
|
||||
break;
|
||||
case 3:
|
||||
/* rate */
|
||||
if (strcmp(field, "*") == 0)
|
||||
break;
|
||||
flags |= FL_RATE_SPEC;
|
||||
lval = strtol(field, &ep, 0);
|
||||
if (ep == field || *ep != '\0' ||
|
||||
lval <= 0) {
|
||||
syslog(LOG_NOTICE, "%s:%d: bad rate",
|
||||
conf_file, linenum);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
dp->rate = lval * 1024;
|
||||
break;
|
||||
default:
|
||||
/* Report error at end of line. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (fields != 4) {
|
||||
syslog(LOG_NOTICE,
|
||||
"%s:%d: %d fields, should be 4",
|
||||
conf_file, linenum, fields);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (flags != (FL_SIZE_SPEC|FL_DAYS_SPEC) &&
|
||||
flags != FL_DAYS_SPEC &&
|
||||
flags != FL_RATE_SPEC) {
|
||||
syslog(LOG_NOTICE,
|
||||
"%s:%d: should specify frequency "
|
||||
"or rate, not both", conf_file,
|
||||
linenum);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
numdisks++;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
if (numdisks == 0) {
|
||||
syslog(LOG_NOTICE, "no disks specified");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Step 2: expand any dp->device == "*" entries. onumdisks is used
|
||||
* so that we don't keep checking dp->device for the new disks as
|
||||
* they're added, since that's pointless.
|
||||
*/
|
||||
onumdisks = numdisks;
|
||||
for (dp = disks; dp < disks + onumdisks; dp++) {
|
||||
if (strcmp(dp->device, "*") == 0) {
|
||||
for (np = np0 = getdisknames(); *np != NULL; np++) {
|
||||
odisks = disks;
|
||||
if ((disks = reallocf(disks,
|
||||
(numdisks + 1) * sizeof (*disks))) == NULL) {
|
||||
syslog(LOG_NOTICE,
|
||||
"reallocf failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* "disks" pointer may have changed. */
|
||||
dp = disks + (dp - odisks);
|
||||
|
||||
disks[numdisks].rate = dp->rate;
|
||||
disks[numdisks].size = dp->size;
|
||||
disks[numdisks].days = dp->days;
|
||||
disks[numdisks].device = *np;
|
||||
numdisks++;
|
||||
}
|
||||
|
||||
/* Don't free the individual pointers, since they're
|
||||
* used as new dp->device entries.
|
||||
*/
|
||||
free(np0);
|
||||
}
|
||||
}
|
||||
|
||||
/* Step 3: open all the disks and set the rate to check at
|
||||
* appropriately. Use "onumdisks" because numdisks changes within
|
||||
* the loop.
|
||||
*/
|
||||
onumdisks = numdisks;
|
||||
for (dp = disks; dp < disks + onumdisks; dp++) {
|
||||
if (strcmp(dp->device, "*") == 0) {
|
||||
numdisks--;
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((dp->fd = open(dp->device, O_RDONLY)) < 0) {
|
||||
syslog(LOG_NOTICE, "open %s failed: %m", dp->device);
|
||||
numdisks--;
|
||||
continue;
|
||||
}
|
||||
|
||||
dp->errors = 0;
|
||||
|
||||
/*
|
||||
* Set the rate appropriately. We read in blocks of this
|
||||
* size, so make it a power of 2 as close as possible to the
|
||||
* specified/calculated rate, and make it in the range
|
||||
* MINRATE..MAXRATE.
|
||||
*/
|
||||
if (dp->size < 0)
|
||||
getdisksize(dp);
|
||||
if (dp->rate < 0)
|
||||
dp->rate = dp->size / (dp->days * 86400);
|
||||
setdiskrate(dp);
|
||||
}
|
||||
|
||||
if (numdisks == 0) {
|
||||
syslog(LOG_NOTICE, "no disks usable");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
/* Add a final entry with dp->device == NULL to end the array. */
|
||||
if ((disks = reallocf(disks,
|
||||
(onumdisks + 1) * sizeof (*disks))) == NULL) {
|
||||
syslog(LOG_NOTICE, "reallocf failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
disks[onumdisks].device = NULL;
|
||||
|
||||
return (disks);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read in the specified disk's size from the disklabel.
|
||||
*/
|
||||
void
|
||||
getdisksize(struct disk *dp) {
|
||||
struct disklabel label;
|
||||
|
||||
if (ioctl(dp->fd, DIOCGDINFO, &label) < 0) {
|
||||
syslog(LOG_NOTICE, "DIOCGDINFO on %s failed: %m",
|
||||
dp->device);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
dp->secsize = label.d_secsize;
|
||||
dp->size = (off_t)label.d_secperunit * label.d_secsize;
|
||||
|
||||
if (label.d_secsize != 512)
|
||||
syslog(LOG_NOTICE,
|
||||
"%s has %d byte sectors, may cause minor problems",
|
||||
dp->device, label.d_secsize);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the nearest power of 2 to the calculated or specified rate, limited
|
||||
* to somewhere between MINRATE and MAXRATE.
|
||||
*/
|
||||
void
|
||||
setdiskrate(struct disk *dp) {
|
||||
int s, r;
|
||||
int above, below;
|
||||
|
||||
if (dp->rate <= MINRATE)
|
||||
dp->rate = MINRATE;
|
||||
else if (dp->rate >= MAXRATE)
|
||||
dp->rate = MAXRATE;
|
||||
else {
|
||||
for (r = dp->rate, s = 0; r != 0; s++)
|
||||
r >>= 1;
|
||||
|
||||
/*
|
||||
* "above" and "below" are the closest power-of-2 numbers to the
|
||||
* calculated rate.
|
||||
*/
|
||||
above = 1 << s;
|
||||
below = 1 << (s - 1);
|
||||
if (above - dp->rate < dp->rate - below)
|
||||
dp->rate = above;
|
||||
else
|
||||
dp->rate = below;
|
||||
}
|
||||
}
|
||||
|
||||
off_t
|
||||
dseek(struct disk *dp, off_t offset, int whence) {
|
||||
off_t n;
|
||||
|
||||
if ((n = lseek(dp->fd, offset, whence)) == -1) {
|
||||
syslog(LOG_NOTICE, "seek failure on %s: %m", dp->device);
|
||||
close(dp->fd);
|
||||
dp->fd = -1;
|
||||
}
|
||||
return (n);
|
||||
}
|
||||
|
||||
/*
|
||||
* Attempt to get a list of all disk names using the `kern.disks' sysctl
|
||||
* variable. An NULL terminated list of pointers to these disks' pathnames
|
||||
* is returned.
|
||||
*/
|
||||
char **
|
||||
getdisknames(void) {
|
||||
char *string, *field;
|
||||
size_t size, numdisks;
|
||||
char **disks;
|
||||
|
||||
if (sysctlbyname("kern.disks", NULL, &size, NULL, 0) != 0 &&
|
||||
errno != ENOMEM) {
|
||||
syslog(LOG_NOTICE, "sysctl kern.disks failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if ((string = malloc(size)) == NULL) {
|
||||
syslog(LOG_NOTICE, "malloc failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (sysctlbyname("kern.disks", string, &size, NULL, 0) != 0) {
|
||||
syslog(LOG_NOTICE, "sysctl kern.disks failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
|
||||
disks = NULL;
|
||||
numdisks = 0;
|
||||
while ((field = strsep(&string, " ")) != NULL) {
|
||||
if ((disks = reallocf(disks,
|
||||
(numdisks + 1) * sizeof (*disks))) == NULL) {
|
||||
syslog(LOG_NOTICE, "reallocf failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
if (asprintf(&disks[numdisks],
|
||||
"%s%s", _PATH_DEV, field) < 0) {
|
||||
syslog(LOG_NOTICE, "asprintf failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
numdisks++;
|
||||
}
|
||||
|
||||
if ((disks = reallocf(disks,
|
||||
(numdisks + 1) * sizeof (*disks))) == NULL) {
|
||||
syslog(LOG_NOTICE, "strdup failure: %m");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
disks[numdisks] = NULL;
|
||||
|
||||
free(string);
|
||||
return (disks);
|
||||
}
|
||||
|
||||
void
|
||||
sighup(int sig) {
|
||||
|
||||
sig = sig;
|
||||
got_sighup = 1;
|
||||
}
|
||||
|
||||
void
|
||||
sigterm(int sig) {
|
||||
|
||||
sig = sig;
|
||||
got_sigterm = 1;
|
||||
}
|
||||
|
||||
void
|
||||
usage(void) {
|
||||
|
||||
fprintf(stderr,
|
||||
"usage: diskcheckd [-d] [-f conf_file] [-o save_file]\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
Loading…
Reference in New Issue
Block a user