Add diskcheck-daemon.

With a small disk being 20GB these days, chances are pretty good that
an ailing sector will not be read while still being recoverable by
the drive.

Diskcheck daemon will read disks in the background at a low rate and
that way give the diskdrive a chance to detect and correct soft read
errors before they become hard errors.

Idea by:	phk
Written by:     ben
This commit is contained in:
Poul-Henning Kamp 2001-06-03 20:02:03 +00:00
parent 2d754db7e6
commit 849f35cf88
6 changed files with 1046 additions and 0 deletions

18
etc/diskcheckd.conf Normal file
View File

@ -0,0 +1,18 @@
# $FreeBSD$
#
# Configuration file for diskcheckd, refer to the diskcheckd.conf(5) manual
# page for further information.
#
# The device name may be specified as "*", in which case all devices are
# checked.
#
# The size may be specified as "*", in which case the size is automatically
# determined from the disklabel.
#
# One of either "Days" or "Rate" must be specified, to specify a complete
# scan should happen once every n days, or that scanning should take place
# at n KB/s. Note that the "Days" value will not cause a scan exactly every
# n days since diskcheckd will always read in power-of-2 size blocks.
#
# Device Size Days Rate
* * 28 *

View File

@ -1518,6 +1518,20 @@ is set to
these are the flags to pass to the
.Xr sendmail 8
daemon.
.It Ar diskcheckd_enable
(bool) Set to
.Ar YES
if you want to start
.Xr diskcheckd 8
at system boot time.
.It Ar diskcheckd_flags
If
.Ar diskcheckd_enable
is set to
.Ar YES ,
these are the flags to pass to the
.Xr diskcheckd 8
daemon.
.It Va dumpdev
.Pq Vt str
If not set to

View File

@ -24,6 +24,7 @@ SUBDIR= IPXrouted \
dev_mkdb \
devinfo \
digictl \
diskcheckd \
diskpart \
edquota \
extattrctl \

View File

@ -0,0 +1,12 @@
# $FreeBSD$
CFLAGS+= -W -Wall -g -O0
PROG= diskcheckd
MAN8= diskcheckd.8
MLINKS= diskcheckd.8 diskcheckd.conf.5
BINDIR?= /usr/sbin
.include <bsd.prog.mk>

View File

@ -0,0 +1,186 @@
.\" Copyright (c) 2000, 2001 Ben Smithurst <ben@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd December 30, 2000
.Dt DISKCHECKD 8
.Os FreeBSD
.Sh NAME
.Nm diskcheckd
.Nd daemon to check for disk read errors
.Sh SYNOPSIS
.Nm
.Op Fl d
.Op Fl f Ar conf_file
.Op Fl o Ar save_file
.Sh DESCRIPTION
.Nm
is a daemon which runs in the background,
reading entire disks to find any read errors on those disks.
The disks which should be scanned,
and the rates at which they should be scanned,
must be specified in the configuration file,
which is
.Pa /etc/diskcheckd.conf
by default.
.Pp
Any blank lines or lines starting with a
.Ql #
character in this file are ignored.
Each non-comment line of this file should contain four white space separated
fields,
which are the full pathname of the disk device,
the size of that disk,
the frequency in days at which to check that disk,
and the rate in kilobytes per second at which to check this disk.
Naturally,
it would be contradictory to specify both the frequency and the rate,
so only one of these should be specified.
Additionally,
the size of the disk should not be specified if the rate is specified,
as this information is unnecessary.
.Pp
If the disk is specified as
.Dq * ,
then
.Nm
will apply the given settings to all disks in the system,
obtained using the
.Va kern.disks
sysctl variable.
If the size is specified as
.Dq *
(recommended),
then the size of the disk will be automatically determined from the
disklabel,
if possible.
Fields which are not specified should contain a single
.Dq *
character.
.Pp
Note that any rate specified will be rounded to the nearest power of 2,
and will be forced into the range 512 bytes to 128 kilobytes,
inclusive.
Any rate calculated from the frequency and size information will be rounded
in the same way,
so if you specify the third field (days for complete scan) it is unlikely
that a complete scan will actually take exactly this many days.
.Pp
To run
.Nm
automatically at boot time,
the
.Ar diskcheckd_enable
variable in
.Xr rc.conf 5
should be set to
.Dq YES .
.Pp
When
.Nm
receives a
.Dv SIGTERM
or
.Dv SIGINT
signal,
it saves its current state information to a file,
so that after a reboot
.Nm
can resume reading from where it left off,
rather than starting from the beginning of the disk again.
The information saved to this file consists of the device filename and the
current offset into that device.
.Pp
.Nm
can be instructed to reload the configuration file by sending it a
.Dv SIGHUP
signal.
.Pp
.Nm
accepts the following command line options:
.Pp
.Bl -tag -width Fl
.It Fl d
If this flag is specified,
.Nm
will not fork into the background and detach from its controlling terminal
to become a daemon.
This flag is primarily used for debugging.
.It Fl f
Specify the configuration file to use,
instead of the default
.Pa /etc/diskcheckd.conf .
.It Fl o
Specify the file to save disk offsets to,
instead of the default
.Pa /var/db/diskcheckd.offsets .
.El
.Sh FILES
.Bl -tag -width /var/db/diskcheckd.offsets -compact
.It Pa /etc/diskcheckd.conf
Default configuration file.
.It Pa /var/db/diskcheckd.offsets
Default location of saved offsets.
.El
.Sh EXAMPLES
To check all of
.Pa /dev/ad0
for errors once every two weeks,
use this entry in
.Pa diskcheckd.conf :
.Pp
.Bd -literal -offset indent
/dev/ad0 * 14 *
.Ed
.Pp
To check the first SCSI disk for errors at approximately 64KB/s,
use the following entry:
.Pp
.Bd -literal -offset indent
/dev/da0 * * 64
.Ed
.Pp
To check all disks once every four weeks:
.Pp
.Bd -literal -offset indent
* * 28 *
.Ed
.Sh DIAGNOSTICS
If any errors occur,
they will be written to
.Xr syslogd 8 .
.Sh HISTORY
.Nm
first appeared in
.Fx 5.0 .
.Sh AUTHORS
.Nm
and this manual page were written by
.An Ben Smithurst Aq ben@FreeBSD.org ,
with input from
.An Poul-Henning Kamp Aq phk@FreeBSD.org .
.Sh BUGS
.Nm
assumes all disks have 512 byte sectors.

View File

@ -0,0 +1,815 @@
/*-
* Copyright (c) 2000, 2001 Ben Smithurst <ben@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
static const char rcsid[] =
"$FreeBSD$";
#include <sys/types.h>
#include <sys/sysctl.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <paths.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include <time.h>
#include <unistd.h>
#define DKTYPENAMES
#include <sys/disklabel.h>
#define _PATH_CONF "/etc/diskcheckd.conf"
#define _PATH_SAVE _PATH_VARDB"diskcheckd.offsets"
#define MAXRATE (128 << 10)
#define MINRATE 512
struct disk {
int fd;
char *device;
off_t size;
int secsize;
int days, rate, errors;
};
volatile sig_atomic_t got_sighup = 0, got_sigterm = 0;
char **getdisknames(void);
off_t dseek(struct disk *, off_t, int);
struct disk *readconf(const char *);
void getdisksize(struct disk *);
void logreaderror(struct disk *, int);
void readchunk(struct disk *, char *);
void readoffsets(struct disk *, const char *);
void setdiskrate(struct disk *);
void sighup(int);
void sigterm(int);
void updateproctitle(struct disk *);
void usage(void);
void writeoffsets(struct disk *, const char *);
int
main(int argc, char *argv[]) {
char *buf;
struct disk *disks, *dp;
int ch, ok;
struct sigaction sa;
int counter, debug;
const char *conf_file, *save_file;
conf_file = _PATH_CONF;
save_file = _PATH_SAVE;
debug = 0;
while ((ch = getopt(argc, argv, "df:o:")) != -1)
switch (ch) {
case 'd':
debug = 1;
break;
case 'f':
conf_file = optarg;
break;
case 'o':
save_file = optarg;
break;
default:
usage();
/* NOTREACHED */
}
argv += optind;
argc -= optind;
if (argc != 0)
usage();
openlog("diskcheckd", LOG_CONS|LOG_PID, LOG_DAEMON);
if (!debug && daemon(0, 0) < 0) {
syslog(LOG_NOTICE, "daemon failure: %m");
exit(EXIT_FAILURE);
}
sa.sa_handler = sigterm;
sa.sa_flags = SA_RESTART;
sigemptyset(&sa.sa_mask);
sigaction(SIGTERM, &sa, NULL);
sigaction(SIGINT, &sa, NULL);
sa.sa_handler = sighup;
sigaction(SIGHUP, &sa, NULL);
/* Read the configuration file and the saved offsets */
disks = readconf(conf_file);
readoffsets(disks, save_file);
if ((buf = malloc(MAXRATE)) == NULL) {
syslog(LOG_NOTICE, "malloc failure: %m");
exit(EXIT_FAILURE);
}
/* The main disk checking loop. */
counter = 0;
while (!got_sigterm) {
ok = 0;
for (dp = disks; dp->device != NULL; dp++)
if (dp->fd != -1) {
ok = 1;
readchunk(dp, buf);
}
if (!ok) {
syslog(LOG_EMERG, "all disks had read errors");
exit(EXIT_FAILURE);
}
if (counter % 300 == 0) {
updateproctitle(disks);
writeoffsets(disks, save_file);
}
counter++;
sleep(1);
if (got_sighup) {
/*
* Got a SIGHUP, so save the offsets, free the
* memory used for the disk structures, and then
* re-read the config file and the disk offsets.
*/
writeoffsets(disks, save_file);
for (dp = disks; dp->device != NULL; dp++) {
free(dp->device);
close(dp->fd);
}
free(disks);
disks = readconf(conf_file);
readoffsets(disks, save_file);
got_sighup = 0;
}
}
writeoffsets(disks, save_file);
return (EXIT_SUCCESS);
}
/*
* Read the next chunk from the specified disk, retrying if necessary.
*/
void
readchunk(struct disk *dp, char *buf) {
ssize_t n;
int s;
n = read(dp->fd, buf, dp->rate);
if (n == 0) {
eof:
syslog(LOG_INFO, "reached end of %s with %d errors",
dp->device, dp->errors);
dseek(dp, 0, SEEK_SET);
dp->errors = 0;
return;
} else if (n > 0)
return;
/*
* Read error, retry in smaller chunks.
*/
logreaderror(dp, dp->rate);
for (s = 0; s < dp->rate; s += 512) {
n = read(dp->fd, buf, 512);
if (n == 0)
goto eof;
else if (n < 0) {
/* log the error and seek past it. */
logreaderror(dp, 512);
dseek(dp, 512, SEEK_CUR);
}
}
}
const char *
fstypename(u_int8_t type) {
static char buf[32];
if (type < FSMAXTYPES)
return (fstypenames[type]);
else {
snprintf(buf, sizeof buf, "%u", type);
return (buf);
}
}
/*
* Report a read error, logging how many bytes were trying to be read, which
* sector they were being read from, and try to also find out what that sector
* is used for.
*/
void
logreaderror(struct disk *dp, int nbytes) {
quad_t secno;
off_t saved_offset;
int fd, slice, part;
struct dos_partition *dos;
struct disklabel label;
char buf[512];
char newdev[512];
saved_offset = dseek(dp, 0, SEEK_CUR);
secno = (quad_t)saved_offset / dp->secsize;
dp->errors++;
syslog(LOG_NOTICE, "error reading %d bytes from sector %qd on %s",
nbytes, secno, dp->device);
/*
* First, find out which slice it's in. To do this, we seek to the
* start of the disk, read the first sector, and go through the DOS
* slice table.
*/
if (dseek(dp, 0, SEEK_SET) == -1)
exit(EXIT_FAILURE);
if (read(dp->fd, buf, sizeof buf) != sizeof buf) {
dseek(dp, saved_offset, SEEK_SET);
return;
}
/* seek back to where we were */
if (dseek(dp, saved_offset, SEEK_SET) == -1)
exit(EXIT_FAILURE);
dos = (struct dos_partition *)&buf[DOSPARTOFF];
for (slice = 0; slice < NDOSPART; slice++)
if (dos[slice].dp_start <= secno &&
secno < dos[slice].dp_start + dos[slice].dp_size)
break;
if (slice == NDOSPART) {
syslog(LOG_NOTICE,
"sector %qd on %s doesn't appear "
"to be within any DOS slice", secno, dp->device);
return;
}
/* Make secno relative to this slice */
secno -= dos[slice].dp_start;
snprintf(newdev, sizeof newdev, "%ss%d", dp->device, slice + 1);
syslog(LOG_DEBUG, "bad sector seems to be within %s", newdev);
/* Check the type of that partition. */
if (dos[slice].dp_typ != DOSPTYP_386BSD) {
/* If not a BSD slice, we can't do much more. */
syslog(LOG_NOTICE, "last bad sector is sector %qd "
"on device %s, type %02x", secno, newdev,
dos[slice].dp_typ);
return;
}
if ((fd = open(newdev, O_RDONLY)) < 0) {
syslog(LOG_NOTICE, "open %s failure: %m", newdev);
return;
}
/* Try to read the disklabel from that device. */
if (ioctl(fd, DIOCGDINFO, &label) < 0) {
syslog(LOG_NOTICE, "DIOCGDINFO on %s failed: %m",
newdev);
return;
}
/* Check which partition this sector is in. */
for (part = 0; part < MAXPARTITIONS; part++)
if (part != 2 && /* skip 'c' partition */
label.d_partitions[part].p_offset <= secno &&
secno < label.d_partitions[part].p_offset +
label.d_partitions[part].p_size)
break;
if (part == MAXPARTITIONS) {
syslog(LOG_NOTICE,
"sector %qd on %s doesn't appear "
"to be within any BSD partition", secno, newdev);
return;
}
secno -= label.d_partitions[part].p_offset;
snprintf(newdev, sizeof newdev, "%ss%d%c",
dp->device, slice + 1, 'a' + part);
syslog(LOG_DEBUG, "bad sector seems to be within %s", newdev);
if (label.d_partitions[part].p_fstype != FS_BSDFFS) {
/* Again, if not a BSD partition, can't do much. */
syslog(LOG_NOTICE, "last bad sector is sector %qd "
"on device %s, type %s", secno, newdev,
fstypename(label.d_partitions[part].p_fstype));
return;
}
syslog(LOG_NOTICE, "last bad sector is sector %qd "
"on 4.2BSD filesystem %s", secno, newdev);
/*
* XXX: todo: find out which file on the BSD filesystem uses this
* sector...
*/
}
/*
* Read the offsets written by writeoffsets().
*/
void
readoffsets(struct disk *disks, const char *save_file) {
FILE *fp;
struct disk *dp;
char *space, buf[1024];
if ((fp = fopen(save_file, "r")) == NULL) {
if (errno != ENOENT)
syslog(LOG_NOTICE, "open %s failed: %m", save_file);
return;
}
while (fgets(buf, sizeof buf, fp) != NULL) {
if ((space = strchr(buf, ' ')) == NULL)
continue;
*space = '\0';
for (dp = disks;
dp->device != NULL && strcmp(dp->device, buf) != 0; dp++)
; /* nothing */
if (dp->device != NULL)
dseek(dp, (off_t)strtoq(space + 1, NULL, 0), SEEK_SET);
}
fclose(fp);
}
/*
* Save the offsets we've reached for each disk to a file, so we can start
* at that position next time the daemon is started.
*/
void
writeoffsets(struct disk *disks, const char *save_file) {
FILE *fp;
struct disk *dp;
if ((fp = fopen(save_file, "w")) == NULL) {
syslog(LOG_NOTICE, "open %s failed: %m", save_file);
return;
}
for (dp = disks; dp->device != NULL; dp++)
if (strcmp(dp->device, "*") != 0)
fprintf(fp, "%s %qd\n", dp->device,
(quad_t)dseek(dp, 0, SEEK_CUR));
fclose(fp);
}
/*
* Set the process title so it's easy to see using ps(1) how much has been
* done.
*/
void
updateproctitle(struct disk *disks) {
struct disk *dp;
char *bp, *p;
static char *buf;
static size_t bufsize;
size_t size;
int inc, ret;
double percent;
bp = buf;
size = bufsize;
for (dp = disks; dp->device != NULL; dp++) {
p = dp->device;
if (strcmp(p, "*") == 0)
continue;
if (strncmp(p, _PATH_DEV, sizeof _PATH_DEV - 1) == 0)
p += sizeof _PATH_DEV - 1;
percent = 100 * (double)dseek(dp, 0, SEEK_CUR) / dp->size;
if ((size_t)(ret = snprintf(bp, size,
"%s %.2f%%, ", p, percent)) >= size) {
inc = ((ret + 1023) >> 10) << 10;
size += inc;
bufsize += inc;
if ((buf = reallocf(buf, bufsize)) == NULL) {
/* Not fatal. */
syslog(LOG_NOTICE, "reallocf failure: %m");
bufsize = 0;
return;
}
bp = buf + bufsize - size;
ret = snprintf(bp, size, "%s %.2f%%, ", p, percent);
}
bp += ret;
size -= ret;
}
if (buf != NULL) {
/* Remove the trailing comma. */
if (&bp[-2] >= buf)
bp[-2] = '\0';
setproctitle("%s", buf);
}
}
/* used to keep track of which fields have been specified */
#define FL_SIZE_SPEC 1
#define FL_DAYS_SPEC 2
#define FL_RATE_SPEC 4
/*
* Read the configuration file, set the rate appropriately for each disk,
* and get a file descriptor for each disk.
*/
struct disk *
readconf(const char *conf_file) {
FILE *fp;
char buf[1024], *line, *field, *ep, **np, **np0;
int fields, flags;
struct disk *disks, *dp, *odisks;
int numdisks, onumdisks;
double dval;
long lval;
int linenum;
if ((fp = fopen(conf_file, "r")) == NULL) {
syslog(LOG_NOTICE, "open %s failure: %m", conf_file);
exit(EXIT_FAILURE);
}
numdisks = 0;
disks = NULL;
linenum = 0;
/* Step 1: read and parse the configuration file. */
while (fgets(buf, sizeof buf, fp) != NULL) {
line = buf;
linenum++;
while (isspace(*line))
line++;
if (*line == '#' || *line == '\n' || *line == '\0')
continue;
fields = flags = 0;
while ((field = strsep(&line, " \t\n")) != NULL) {
if (*field == '\0')
continue;
/*
* If this is the first field on a line, allocate
* space for one more disk structure.
*/
if (fields == 0 && (disks = reallocf(disks,
(numdisks + 1) * sizeof (*disks))) == NULL) {
syslog(LOG_NOTICE, "reallocf failure: %m");
exit(EXIT_FAILURE);
}
dp = &disks[numdisks];
switch (fields++) {
case 0:
/* device name */
if ((dp->device = strdup(field)) == NULL) {
syslog(LOG_NOTICE,
"strdup failure: %m");
exit(EXIT_FAILURE);
}
dp->fd = -1;
dp->rate = -1;
dp->size = -1;
break;
case 1:
/* size */
if (strcmp(field, "*") == 0)
break;
flags |= FL_SIZE_SPEC;
dval = strtod(field, &ep);
if (dval < 0) {
syslog(LOG_NOTICE,
"%s:%d: size cannot be negative",
conf_file, linenum);
exit(EXIT_FAILURE);
}
if (strcasecmp(ep, "M") == 0)
dp->size = dval * 1024 * 1024;
else if (strcasecmp(ep, "G") == 0)
dp->size = dval * 1024 * 1024 * 1024;
else if (*ep == '\0')
dp->size = dval;
else {
syslog(LOG_NOTICE,
"%s:%d: bad suffix \"%s\" on "
"size \"%s\"", conf_file,
linenum, ep, field);
exit(EXIT_FAILURE);
}
break;
case 2:
/* days */
if (strcmp(field, "*") == 0)
break;
flags |= FL_DAYS_SPEC;
lval = strtol(field, &ep, 0);
if (ep == field || *ep != '\0' ||
lval <= 0) {
syslog(LOG_NOTICE,
"%s:%d: bad number of days",
conf_file, linenum);
exit(EXIT_FAILURE);
}
dp->days = lval;
break;
case 3:
/* rate */
if (strcmp(field, "*") == 0)
break;
flags |= FL_RATE_SPEC;
lval = strtol(field, &ep, 0);
if (ep == field || *ep != '\0' ||
lval <= 0) {
syslog(LOG_NOTICE, "%s:%d: bad rate",
conf_file, linenum);
exit(EXIT_FAILURE);
}
dp->rate = lval * 1024;
break;
default:
/* Report error at end of line. */
break;
}
}
if (fields != 4) {
syslog(LOG_NOTICE,
"%s:%d: %d fields, should be 4",
conf_file, linenum, fields);
exit(EXIT_FAILURE);
}
if (flags != (FL_SIZE_SPEC|FL_DAYS_SPEC) &&
flags != FL_DAYS_SPEC &&
flags != FL_RATE_SPEC) {
syslog(LOG_NOTICE,
"%s:%d: should specify frequency "
"or rate, not both", conf_file,
linenum);
exit(EXIT_FAILURE);
}
numdisks++;
}
fclose(fp);
if (numdisks == 0) {
syslog(LOG_NOTICE, "no disks specified");
exit(EXIT_FAILURE);
}
/*
* Step 2: expand any dp->device == "*" entries. onumdisks is used
* so that we don't keep checking dp->device for the new disks as
* they're added, since that's pointless.
*/
onumdisks = numdisks;
for (dp = disks; dp < disks + onumdisks; dp++) {
if (strcmp(dp->device, "*") == 0) {
for (np = np0 = getdisknames(); *np != NULL; np++) {
odisks = disks;
if ((disks = reallocf(disks,
(numdisks + 1) * sizeof (*disks))) == NULL) {
syslog(LOG_NOTICE,
"reallocf failure: %m");
exit(EXIT_FAILURE);
}
/* "disks" pointer may have changed. */
dp = disks + (dp - odisks);
disks[numdisks].rate = dp->rate;
disks[numdisks].size = dp->size;
disks[numdisks].days = dp->days;
disks[numdisks].device = *np;
numdisks++;
}
/* Don't free the individual pointers, since they're
* used as new dp->device entries.
*/
free(np0);
}
}
/* Step 3: open all the disks and set the rate to check at
* appropriately. Use "onumdisks" because numdisks changes within
* the loop.
*/
onumdisks = numdisks;
for (dp = disks; dp < disks + onumdisks; dp++) {
if (strcmp(dp->device, "*") == 0) {
numdisks--;
continue;
}
if ((dp->fd = open(dp->device, O_RDONLY)) < 0) {
syslog(LOG_NOTICE, "open %s failed: %m", dp->device);
numdisks--;
continue;
}
dp->errors = 0;
/*
* Set the rate appropriately. We read in blocks of this
* size, so make it a power of 2 as close as possible to the
* specified/calculated rate, and make it in the range
* MINRATE..MAXRATE.
*/
if (dp->size < 0)
getdisksize(dp);
if (dp->rate < 0)
dp->rate = dp->size / (dp->days * 86400);
setdiskrate(dp);
}
if (numdisks == 0) {
syslog(LOG_NOTICE, "no disks usable");
exit(EXIT_FAILURE);
}
/* Add a final entry with dp->device == NULL to end the array. */
if ((disks = reallocf(disks,
(onumdisks + 1) * sizeof (*disks))) == NULL) {
syslog(LOG_NOTICE, "reallocf failure: %m");
exit(EXIT_FAILURE);
}
disks[onumdisks].device = NULL;
return (disks);
}
/*
* Read in the specified disk's size from the disklabel.
*/
void
getdisksize(struct disk *dp) {
struct disklabel label;
if (ioctl(dp->fd, DIOCGDINFO, &label) < 0) {
syslog(LOG_NOTICE, "DIOCGDINFO on %s failed: %m",
dp->device);
exit(EXIT_FAILURE);
}
dp->secsize = label.d_secsize;
dp->size = (off_t)label.d_secperunit * label.d_secsize;
if (label.d_secsize != 512)
syslog(LOG_NOTICE,
"%s has %d byte sectors, may cause minor problems",
dp->device, label.d_secsize);
}
/*
* Find the nearest power of 2 to the calculated or specified rate, limited
* to somewhere between MINRATE and MAXRATE.
*/
void
setdiskrate(struct disk *dp) {
int s, r;
int above, below;
if (dp->rate <= MINRATE)
dp->rate = MINRATE;
else if (dp->rate >= MAXRATE)
dp->rate = MAXRATE;
else {
for (r = dp->rate, s = 0; r != 0; s++)
r >>= 1;
/*
* "above" and "below" are the closest power-of-2 numbers to the
* calculated rate.
*/
above = 1 << s;
below = 1 << (s - 1);
if (above - dp->rate < dp->rate - below)
dp->rate = above;
else
dp->rate = below;
}
}
off_t
dseek(struct disk *dp, off_t offset, int whence) {
off_t n;
if ((n = lseek(dp->fd, offset, whence)) == -1) {
syslog(LOG_NOTICE, "seek failure on %s: %m", dp->device);
close(dp->fd);
dp->fd = -1;
}
return (n);
}
/*
* Attempt to get a list of all disk names using the `kern.disks' sysctl
* variable. An NULL terminated list of pointers to these disks' pathnames
* is returned.
*/
char **
getdisknames(void) {
char *string, *field;
size_t size, numdisks;
char **disks;
if (sysctlbyname("kern.disks", NULL, &size, NULL, 0) != 0 &&
errno != ENOMEM) {
syslog(LOG_NOTICE, "sysctl kern.disks failure: %m");
exit(EXIT_FAILURE);
}
if ((string = malloc(size)) == NULL) {
syslog(LOG_NOTICE, "malloc failure: %m");
exit(EXIT_FAILURE);
}
if (sysctlbyname("kern.disks", string, &size, NULL, 0) != 0) {
syslog(LOG_NOTICE, "sysctl kern.disks failure: %m");
exit(EXIT_FAILURE);
}
disks = NULL;
numdisks = 0;
while ((field = strsep(&string, " ")) != NULL) {
if ((disks = reallocf(disks,
(numdisks + 1) * sizeof (*disks))) == NULL) {
syslog(LOG_NOTICE, "reallocf failure: %m");
exit(EXIT_FAILURE);
}
if (asprintf(&disks[numdisks],
"%s%s", _PATH_DEV, field) < 0) {
syslog(LOG_NOTICE, "asprintf failure: %m");
exit(EXIT_FAILURE);
}
numdisks++;
}
if ((disks = reallocf(disks,
(numdisks + 1) * sizeof (*disks))) == NULL) {
syslog(LOG_NOTICE, "strdup failure: %m");
exit(EXIT_FAILURE);
}
disks[numdisks] = NULL;
free(string);
return (disks);
}
void
sighup(int sig) {
sig = sig;
got_sighup = 1;
}
void
sigterm(int sig) {
sig = sig;
got_sigterm = 1;
}
void
usage(void) {
fprintf(stderr,
"usage: diskcheckd [-d] [-f conf_file] [-o save_file]\n");
exit(EXIT_FAILURE);
}