Please welcome HAST - Highly Avalable Storage.

HAST allows to transparently store data on two physically separated machines
connected over the TCP/IP network. HAST works in Primary-Secondary
(Master-Backup, Master-Slave) configuration, which means that only one of the
cluster nodes can be active at any given time. Only Primary node is able to
handle I/O requests to HAST-managed devices. Currently HAST is limited to two
cluster nodes in total.

HAST operates on block level - it provides disk-like devices in /dev/hast/
directory for use by file systems and/or applications. Working on block level
makes it transparent for file systems and applications. There in no difference
between using HAST-provided device and raw disk, partition, etc. All of them
are just regular GEOM providers in FreeBSD.

For more information please consult hastd(8), hastctl(8) and hast.conf(5)
manual pages, as well as http://wiki.FreeBSD.org/HAST.

Sponsored by:	FreeBSD Foundation
Sponsored by:	OMCnet Internet Service GmbH
Sponsored by:	TransIP BV
This commit is contained in:
pjd 2010-02-18 23:16:19 +00:00
parent a448fe30c9
commit 1c1e2e8b71
56 changed files with 11571 additions and 77 deletions

View File

@ -260,6 +260,9 @@ syslogd_flags="-s" # Flags to syslogd (if enabled).
inetd_enable="NO" # Run the network daemon dispatcher (YES/NO).
inetd_program="/usr/sbin/inetd" # path to inetd, if you want a different one.
inetd_flags="-wW -C 60" # Optional flags to inetd
hastd_enable="NO" # Run the HAST daemon (YES/NO).
hastd_program="/sbin/hastd" # path to hastd, if you want a different one.
hastd_flags="" # Optional flags to hastd.
#
# named. It may be possible to run named in a sandbox, man security for
# details.

View File

@ -12,7 +12,7 @@ FILES= DAEMON FILESYSTEMS LOGIN NETWORKING SERVERS \
encswap \
faith fsck ftp-proxy ftpd \
gbde geli geli2 gssd \
hcsecd \
hastd hcsecd \
hostapd hostid hostid_save hostname \
inetd initrandom \
ip6addrctl ipfilter ipfs ipfw ipmon \

31
etc/rc.d/hastd Normal file
View File

@ -0,0 +1,31 @@
#!/bin/sh
#
# $FreeBSD$
#
# PROVIDE: hastd
# REQUIRE: NETWORKING syslogd
# BEFORE: DAEMON
. /etc/rc.subr
name="hastd"
rcvar=`set_rcvar`
pidfile="/var/run/${name}.pid"
command="/sbin/${name}"
hastctl="/sbin/hastctl"
required_files="/etc/hast.conf"
stop_precmd="hastd_stop_precmd"
required_modules="geom_gate:g_gate"
sockfile="/var/run/syslogd.sockets"
evalargs="rc_flags=\"\`set_socketlist\` \$rc_flags\""
altlog_proglist="named"
hastd_stop_precmd()
{
${hastctl} role init all
}
load_rc_config $name
run_rc_command "$1"

View File

@ -36,6 +36,8 @@ SUBDIR= adjkerntz \
ggate \
growfs \
gvinum \
hastctl \
hastd \
ifconfig \
init \
${_ipf} \

View File

@ -59,7 +59,7 @@ enum { UNSET, CREATE, DESTROY, LIST, RESCUE } action = UNSET;
static const char *path = NULL;
static const char *host = NULL;
static int unit = -1;
static int unit = G_GATE_UNIT_AUTO;
static unsigned flags = 0;
static int force = 0;
static unsigned queue_size = G_GATE_QUEUE_SIZE;

View File

@ -50,7 +50,7 @@
enum { UNSET, CREATE, DESTROY, LIST, RESCUE } action = UNSET;
static const char *path = NULL;
static int unit = -1;
static int unit = G_GATE_UNIT_AUTO;
static unsigned flags = 0;
static int force = 0;
static unsigned queue_size = G_GATE_QUEUE_SIZE;

36
sbin/hastctl/Makefile Normal file
View File

@ -0,0 +1,36 @@
# $FreeBSD$
.include <bsd.own.mk>
.PATH: ${.CURDIR}/../hastd
PROG= hastctl
SRCS= activemap.c
SRCS+= ebuf.c
SRCS+= hast_proto.c hastctl.c
SRCS+= metadata.c
SRCS+= nv.c
SRCS+= parse.y pjdlog.c
SRCS+= proto.c proto_common.c proto_tcp4.c proto_uds.c
SRCS+= token.l
SRCS+= subr.c
SRCS+= y.tab.h
WARNS?= 6
MAN= hastctl.8
CFLAGS+=-I${.CURDIR}/../hastd
CFLAGS+=-DINET
.if ${MK_INET6_SUPPORT} != "no"
CFLAGS+=-DINET6
.endif
# This is needed to have WARNS > 1.
CFLAGS+=-DYY_NO_UNPUT
DPADD= ${LIBCRYPTO} ${LIBL}
LDADD= -lcrypto -ll
YFLAGS+=-v
CLEANFILES=y.tab.c y.tab.h y.output
.include <bsd.prog.mk>

217
sbin/hastctl/hastctl.8 Normal file
View File

@ -0,0 +1,217 @@
.\" Copyright (c) 2010 The FreeBSD Foundation
.\" All rights reserved.
.\"
.\" This software was developed by Pawel Jakub Dawidek under sponsorship from
.\" the FreeBSD Foundation.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd February 1, 2010
.Dt HASTCTL 8
.Os
.Sh NAME
.Nm hastctl
.Nd "Highly Available Storage control utility"
.Sh SYNOPSIS
.Nm
.Cm create
.Op Fl d
.Op Fl c Ar config
.Op Fl e Ar extentsize
.Op Fl k Ar keepdirty
.Op Fl m Ar mediasize
.Ar name ...
.Nm
.Cm role
.Op Fl d
.Op Fl c Ar config
.Aq init | primary | secondary
.Ar all | name ...
.Nm
.Cm status
.Op Fl d
.Op Fl c Ar config
.Op Ar all | name ...
.Nm
.Cm dump
.Op Fl d
.Op Fl c Ar config
.Op Ar all | name ...
.Sh DESCRIPTION
The
.Nm
utility is used to control the behaviour of the
.Xr hastd 8
daemon.
.Pp
This utility should be used by HA software like
.Nm heartbeat
or
.Nm ucarp
to setup HAST resources role when changing from primary mode to
secondary or vice versa.
Be aware that if a file system like UFS exists on HAST provider and
primary node dies, file system has to be checked for inconsistencies
with the
.Xr fsck 8
utility after switching secondary node to primary role.
.Pp
The first argument to
.Nm
indicates an action to be performed:
.Bl -tag -width ".Cm create"
.It Cm create
Initialize local provider configured for the given resource.
Additional options include:
.Bl -tag -width ".Fl e Ar extentsize"
.It Fl e Ar extentsize
Size of an extent.
Extent is a block which is used for synchronization.
.Nm
maintains a map of dirty extents and extent is the smallest region that
can be marked as dirty.
If any part of an extent is modified, entire extent will be synchronized
when nodes connect.
If extent size is too small, there will be too much disk activity
related to dirty map updates, which will degrade performance of the
given resource.
If extent size is too large, synchronization, even in case of short
outage, can take a long time increasing the risk of loosing up-to-date
node before synchronization process is completed.
The default extent size is
.Va 2MB .
.It Fl k Ar keepdirty
Maximum number of dirty extents to keep dirty all the time.
Most recently used extents are kept dirty to reduce number of metadata
updates.
The default numer of most recently used extents which will be kept
dirty is
.Va 64 .
.It Fl m Ar mediasize
Size of the smaller provider used as backend storage on both nodes.
This option can be omitted if node providers have the same size on both
sides.
.El
.It Cm role
Change role of the given resource.
The role can be one of:
.Bl -tag -width ".Cm secondary"
.It Cm init
Resource is turned off.
.It Cm primary
Local
.Xr hastd 8
daemon will act as primary node for the given resource.
System on which resource role is set to primary can use
.Pa /dev/hast/<name>
GEOM provider.
.It Cm secondary
Local
.Xr hastd 8
daemon will act as secondary node for the given resource - it will wait
for connection from the primary node and will handle I/O requests
received from it.
GEOM provider
.Pa /dev/hast/<name>
will not be created on secondary node.
.El
.It Cm status
Present status of the configured resources.
.It Cm dump
Dump metadata stored on local component for the configured resources.
.El
.Pp
In addition, every subcommand can be followed by the following options:
.Bl -tag -width ".Fl c Ar config"
.It Fl c Ar config
Specify alternative location of the configuration file.
The default location is
.Pa /etc/hast.conf .
.It Fl d
Print debugging information.
This option can be specified multiple times to raise the verbosity
level.
.El
.Sh EXIT STATUS
Exit status is 0 on success, or one of the values described in
.Xr sysexits 3
on failure.
.Sh EXAMPLES
Initialize HAST provider, create file system on it and mount it.
.Bd -literal -offset indent
nodeB# hastctl create shared
nodeB# hastd
nodeB# hastctl role secondary shared
nodeB# hastctl create shared
nodeA# hastd
nodeA# hastctl role primary shared
nodeA# newfs -U /dev/hast/shared
nodeA# mount -o noatime /dev/hast/shared /shared
nodeA# application_start
.Ed
.Pp
Switch roles for the
.Nm shared
HAST resource.
.Bd -literal -offset indent
nodeA# application_stop
nodeA# umount -f /shared
nodeA# hastctl role secondary shared
nodeB# hastctl role primary shared
nodeB# fsck -t ufs /dev/hast/shared
nodeB# mount -o noatime /dev/hast/shared /shared
nodeB# application_start
.Ed
.Sh FILES
.Bl -tag -width ".Pa /var/run/hastctl" -compact
.It Pa /etc/hast.conf
Configuration file for
.Nm
and
.Xr hastd 8 .
.It Pa /var/run/hastctl
Control socket used by
.Nm
to communicate with the
.Xr hastd 8
daemon.
.El
.Sh SEE ALSO
.Xr sysexits 3 ,
.Xr geom 4 ,
.Xr hast.conf 5 ,
.Xr fsck 8 ,
.Xr ggatec 8 ,
.Xr ggatel 8 ,
.Xr hastd 8 ,
.Xr mount 8 ,
.Xr newfs 8 .
.Sh AUTHORS
The
.Nm
was developed by
.An Pawel Jakub Dawidek Aq pjd@FreeBSD.org
under sponsorship of the FreeBSD Foundation.

526
sbin/hastctl/hastctl.c Normal file
View File

@ -0,0 +1,526 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/disk.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <inttypes.h>
#include <limits.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <activemap.h>
#include "hast.h"
#include "hast_proto.h"
#include "metadata.h"
#include "nv.h"
#include "pjdlog.h"
#include "proto.h"
#include "subr.h"
/* Path to configuration file. */
static const char *cfgpath = HAST_CONFIG;
/* Hastd configuration. */
static struct hastd_config *cfg;
/* Control connection. */
static struct proto_conn *controlconn;
enum {
CMD_INVALID,
CMD_CREATE,
CMD_ROLE,
CMD_STATUS,
CMD_DUMP
};
static __dead2 void
usage(void)
{
fprintf(stderr,
"usage: %s create [-d] [-c config] [-e extentsize] [-k keepdirty]\n"
"\t\t[-m mediasize] name ...\n",
getprogname());
fprintf(stderr,
" %s role [-d] [-c config] <init | primary | secondary> all | name ...\n",
getprogname());
fprintf(stderr,
" %s status [-d] [-c config] [all | name ...]\n",
getprogname());
fprintf(stderr,
" %s dump [-d] [-c config] [all | name ...]\n",
getprogname());
exit(EX_USAGE);
}
static int
create_one(struct hast_resource *res, intmax_t mediasize, intmax_t extentsize,
intmax_t keepdirty)
{
unsigned char *buf;
size_t mapsize;
int ec;
ec = 0;
pjdlog_prefix_set("[%s] ", res->hr_name);
if (provinfo(res, true) < 0) {
ec = EX_NOINPUT;
goto end;
}
if (mediasize == 0)
mediasize = res->hr_local_mediasize;
else if (mediasize > res->hr_local_mediasize) {
pjdlog_error("Provided mediasize is larger than provider %s size.",
res->hr_localpath);
ec = EX_DATAERR;
goto end;
}
if (!powerof2(res->hr_local_sectorsize)) {
pjdlog_error("Sector size of provider %s is not power of 2 (%u).",
res->hr_localpath, res->hr_local_sectorsize);
ec = EX_DATAERR;
goto end;
}
if (extentsize == 0)
extentsize = HAST_EXTENTSIZE;
if (extentsize < res->hr_local_sectorsize) {
pjdlog_error("Extent size (%jd) is less than sector size (%u).",
(intmax_t)extentsize, res->hr_local_sectorsize);
ec = EX_DATAERR;
goto end;
}
if ((extentsize % res->hr_local_sectorsize) != 0) {
pjdlog_error("Extent size (%jd) is not multiple of sector size (%u).",
(intmax_t)extentsize, res->hr_local_sectorsize);
ec = EX_DATAERR;
goto end;
}
mapsize = activemap_calc_ondisk_size(mediasize - METADATA_SIZE,
extentsize, res->hr_local_sectorsize);
if (keepdirty == 0)
keepdirty = HAST_KEEPDIRTY;
res->hr_datasize = mediasize - METADATA_SIZE - mapsize;
res->hr_extentsize = extentsize;
res->hr_keepdirty = keepdirty;
res->hr_localoff = METADATA_SIZE + mapsize;
if (metadata_write(res) < 0) {
ec = EX_IOERR;
goto end;
}
buf = calloc(1, mapsize);
if (buf == NULL) {
pjdlog_error("Unable to allocate %zu bytes of memory for initial bitmap.",
mapsize);
ec = EX_TEMPFAIL;
goto end;
}
if (pwrite(res->hr_localfd, buf, mapsize, METADATA_SIZE) !=
(ssize_t)mapsize) {
pjdlog_errno(LOG_ERR, "Unable to store initial bitmap on %s",
res->hr_localpath);
free(buf);
ec = EX_IOERR;
goto end;
}
free(buf);
end:
if (res->hr_localfd >= 0)
close(res->hr_localfd);
pjdlog_prefix_set("%s", "");
return (ec);
}
static void
control_create(int argc, char *argv[], intmax_t mediasize, intmax_t extentsize,
intmax_t keepdirty)
{
struct hast_resource *res;
int ec, ii, ret;
/* Initialize the given resources. */
if (argc < 1)
usage();
ec = 0;
for (ii = 0; ii < argc; ii++) {
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (strcmp(argv[ii], res->hr_name) == 0)
break;
}
if (res == NULL) {
pjdlog_error("Unknown resource %s.", argv[ii]);
if (ec == 0)
ec = EX_DATAERR;
continue;
}
ret = create_one(res, mediasize, extentsize, keepdirty);
if (ret != 0 && ec == 0)
ec = ret;
}
exit(ec);
}
static int
dump_one(struct hast_resource *res)
{
int ret;
ret = metadata_read(res, false);
if (ret != 0)
return (ret);
printf("resource: %s\n", res->hr_name);
printf(" datasize: %ju\n", (uintmax_t)res->hr_datasize);
printf(" extentsize: %d\n", res->hr_extentsize);
printf(" keepdirty: %d\n", res->hr_keepdirty);
printf(" localoff: %ju\n", (uintmax_t)res->hr_localoff);
printf(" resuid: %ju\n", (uintmax_t)res->hr_resuid);
printf(" localcnt: %ju\n", (uintmax_t)res->hr_primary_localcnt);
printf(" remotecnt: %ju\n", (uintmax_t)res->hr_primary_remotecnt);
printf(" prevrole: %s\n", role2str(res->hr_previous_role));
return (0);
}
static void
control_dump(int argc, char *argv[])
{
struct hast_resource *res;
int ec, ret;
/* Dump metadata of the given resource(s). */
ec = 0;
if (argc == 0 || (argc == 1 && strcmp(argv[0], "all") == 0)) {
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
ret = dump_one(res);
if (ret != 0 && ec == 0)
ec = ret;
}
} else {
int ii;
for (ii = 0; ii < argc; ii++) {
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (strcmp(argv[ii], res->hr_name) == 0)
break;
}
if (res == NULL) {
pjdlog_error("Unknown resource %s.", argv[ii]);
if (ec == 0)
ec = EX_DATAERR;
continue;
}
ret = dump_one(res);
if (ret != 0 && ec == 0)
ec = ret;
}
}
exit(ec);
}
static int
control_set_role(struct nv *nv, const char *newrole)
{
const char *res, *oldrole;
unsigned int ii;
int error, ret;
ret = 0;
for (ii = 0; ; ii++) {
res = nv_get_string(nv, "resource%u", ii);
if (res == NULL)
break;
pjdlog_prefix_set("[%s] ", res);
error = nv_get_int16(nv, "error%u", ii);
if (error != 0) {
if (ret == 0)
ret = error;
pjdlog_warning("Received error %d from hastd.", error);
continue;
}
oldrole = nv_get_string(nv, "role%u", ii);
if (strcmp(oldrole, newrole) == 0)
pjdlog_debug(2, "Role unchanged (%s).", oldrole);
else {
pjdlog_debug(1, "Role changed from %s to %s.", oldrole,
newrole);
}
}
pjdlog_prefix_set("%s", "");
return (ret);
}
static int
control_status(struct nv *nv)
{
unsigned int ii;
const char *str;
int error, ret;
ret = 0;
for (ii = 0; ; ii++) {
str = nv_get_string(nv, "resource%u", ii);
if (str == NULL)
break;
printf("%s:\n", str);
error = nv_get_int16(nv, "error%u", ii);
if (error != 0) {
if (ret == 0)
ret = error;
printf(" error: %d\n", error);
continue;
}
printf(" role: %s\n", nv_get_string(nv, "role%u", ii));
printf(" provname: %s\n",
nv_get_string(nv, "provname%u", ii));
printf(" localpath: %s\n",
nv_get_string(nv, "localpath%u", ii));
printf(" extentsize: %u\n",
(unsigned int)nv_get_uint32(nv, "extentsize%u", ii));
printf(" keepdirty: %u\n",
(unsigned int)nv_get_uint32(nv, "keepdirty%u", ii));
printf(" remoteaddr: %s\n",
nv_get_string(nv, "remoteaddr%u", ii));
printf(" replication: %s\n",
nv_get_string(nv, "replication%u", ii));
str = nv_get_string(nv, "status%u", ii);
if (str != NULL)
printf(" status: %s\n", str);
printf(" dirty: %ju bytes\n",
(uintmax_t)nv_get_uint64(nv, "dirty%u", ii));
}
return (ret);
}
static int
numfromstr(const char *str, intmax_t *nump)
{
intmax_t num;
char *suffix;
int rerrno;
rerrno = errno;
errno = 0;
num = strtoimax(str, &suffix, 0);
if (errno == 0 && *suffix != '\0')
errno = EINVAL;
if (errno != 0)
return (-1);
*nump = num;
errno = rerrno;
return (0);
}
int
main(int argc, char *argv[])
{
struct nv *nv;
intmax_t mediasize, extentsize, keepdirty;
int cmd, debug, error, ii;
const char *optstr;
debug = 0;
mediasize = extentsize = keepdirty = 0;
if (argc == 1)
usage();
if (strcmp(argv[1], "create") == 0) {
cmd = CMD_CREATE;
optstr = "c:de:k:m:h";
} else if (strcmp(argv[1], "role") == 0) {
cmd = CMD_ROLE;
optstr = "c:dh";
} else if (strcmp(argv[1], "status") == 0) {
cmd = CMD_STATUS;
optstr = "c:dh";
} else if (strcmp(argv[1], "dump") == 0) {
cmd = CMD_DUMP;
optstr = "c:dh";
} else
usage();
argc--;
argv++;
for (;;) {
int ch;
ch = getopt(argc, argv, optstr);
if (ch == -1)
break;
switch (ch) {
case 'c':
cfgpath = optarg;
break;
case 'd':
debug++;
break;
case 'e':
if (numfromstr(optarg, &extentsize) < 0)
err(1, "Invalid extentsize");
break;
case 'k':
if (numfromstr(optarg, &keepdirty) < 0)
err(1, "Invalid keepdirty");
break;
case 'm':
if (numfromstr(optarg, &mediasize) < 0)
err(1, "Invalid mediasize");
break;
case 'h':
default:
usage();
}
}
argc -= optind;
argv += optind;
switch (cmd) {
case CMD_CREATE:
case CMD_ROLE:
if (argc == 0)
usage();
break;
}
pjdlog_debug_set(debug);
cfg = yy_config_parse(cfgpath);
assert(cfg != NULL);
switch (cmd) {
case CMD_CREATE:
control_create(argc, argv, mediasize, extentsize, keepdirty);
/* NOTREACHED */
assert(!"What are we doing here?!");
break;
case CMD_DUMP:
/* Dump metadata from local component of the given resource. */
control_dump(argc, argv);
/* NOTREACHED */
assert(!"What are we doing here?!");
break;
case CMD_ROLE:
/* Change role for the given resources. */
if (argc < 2)
usage();
nv = nv_alloc();
nv_add_uint8(nv, HASTCTL_CMD_SETROLE, "cmd");
if (strcmp(argv[0], "init") == 0)
nv_add_uint8(nv, HAST_ROLE_INIT, "role");
else if (strcmp(argv[0], "primary") == 0)
nv_add_uint8(nv, HAST_ROLE_PRIMARY, "role");
else if (strcmp(argv[0], "secondary") == 0)
nv_add_uint8(nv, HAST_ROLE_SECONDARY, "role");
else
usage();
for (ii = 0; ii < argc - 1; ii++)
nv_add_string(nv, argv[ii + 1], "resource%d", ii);
break;
case CMD_STATUS:
/* Obtain status of the given resources. */
nv = nv_alloc();
nv_add_uint8(nv, HASTCTL_CMD_STATUS, "cmd");
if (argc == 0)
nv_add_string(nv, "all", "resource%d", 0);
else {
for (ii = 0; ii < argc; ii++)
nv_add_string(nv, argv[ii], "resource%d", ii);
}
break;
default:
assert(!"Impossible role!");
}
/* Setup control connection... */
if (proto_client(cfg->hc_controladdr, &controlconn) < 0) {
pjdlog_exit(EX_OSERR,
"Unable to setup control connection to %s",
cfg->hc_controladdr);
}
/* ...and connect to hastd. */
if (proto_connect(controlconn) < 0) {
pjdlog_exit(EX_OSERR, "Unable to connect to hastd via %s",
cfg->hc_controladdr);
}
/* Send the command to the server... */
if (hast_proto_send(NULL, controlconn, nv, NULL, 0) < 0) {
pjdlog_exit(EX_UNAVAILABLE,
"Unable to send command to hastd via %s",
cfg->hc_controladdr);
}
nv_free(nv);
/* ...and receive reply. */
if (hast_proto_recv(NULL, controlconn, &nv, NULL, 0) < 0) {
pjdlog_exit(EX_UNAVAILABLE,
"cannot receive reply from hastd via %s",
cfg->hc_controladdr);
}
error = nv_get_int16(nv, "error");
if (error != 0) {
pjdlog_exitx(EX_SOFTWARE, "Error %d received from hastd.",
error);
}
nv_set_error(nv, 0);
switch (cmd) {
case CMD_ROLE:
error = control_set_role(nv, argv[0]);
break;
case CMD_STATUS:
error = control_status(nv);
break;
default:
assert(!"Impossible role!");
}
exit(error);
}

37
sbin/hastd/Makefile Normal file
View File

@ -0,0 +1,37 @@
# $FreeBSD$
.include <bsd.own.mk>
PROG= hastd
SRCS= activemap.c
SRCS+= control.c
SRCS+= ebuf.c
SRCS+= hast_proto.c hastd.c hooks.c
SRCS+= metadata.c
SRCS+= nv.c
SRCS+= secondary.c
SRCS+= parse.y pjdlog.c primary.c
SRCS+= proto.c proto_common.c proto_socketpair.c proto_tcp4.c proto_uds.c
SRCS+= rangelock.c
SRCS+= subr.c
SRCS+= token.l
SRCS+= y.tab.h
WARNS?= 6
MAN= hastd.8 hast.conf.5
CFLAGS+=-I${.CURDIR}
CFLAGS+=-DINET
.if ${MK_INET6_SUPPORT} != "no"
CFLAGS+=-DINET6
.endif
# This is needed to have WARNS > 1.
CFLAGS+=-DYY_NO_UNPUT
DPADD= ${LIBCRYPTO} ${LIBGEOM} ${LIBL} ${LIBPTHREAD} ${LIBUTIL}
LDADD= -lcrypto -lgeom -ll -lpthread -lutil
YFLAGS+=-v
CLEANFILES=y.tab.c y.tab.h y.output
.include <bsd.prog.mk>

691
sbin/hastd/activemap.c Normal file
View File

@ -0,0 +1,691 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h> /* powerof2() */
#include <sys/queue.h>
#include <assert.h>
#include <bitstring.h>
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <activemap.h>
#define ACTIVEMAP_MAGIC 0xac71e4
struct activemap {
int am_magic; /* Magic value. */
off_t am_mediasize; /* Media size in bytes. */
uint32_t am_extentsize; /* Extent size in bytes,
must be power of 2. */
uint8_t am_extentshift;/* 2 ^ extentbits == extentsize */
int am_nextents; /* Number of extents. */
size_t am_mapsize; /* Bitmap size in bytes. */
uint16_t *am_memtab; /* An array that holds number of pending
writes per extent. */
bitstr_t *am_diskmap; /* On-disk bitmap of dirty extents. */
bitstr_t *am_memmap; /* In-memory bitmap of dirty extents. */
size_t am_diskmapsize; /* Map size rounded up to sector size. */
uint64_t am_ndirty; /* Number of dirty regions. */
bitstr_t *am_syncmap; /* Bitmap of extents to sync. */
off_t am_syncoff; /* Next synchronization offset. */
TAILQ_HEAD(skeepdirty, keepdirty) am_keepdirty; /* List of extents that
we keep dirty to reduce bitmap
updates. */
int am_nkeepdirty; /* Number of am_keepdirty elements. */
int am_nkeepdirty_limit; /* Maximum number of am_keepdirty
elements. */
};
struct keepdirty {
int kd_extent;
TAILQ_ENTRY(keepdirty) kd_next;
};
/*
* Helper function taken from sys/systm.h to calculate extentshift.
*/
static uint32_t
bitcount32(uint32_t x)
{
x = (x & 0x55555555) + ((x & 0xaaaaaaaa) >> 1);
x = (x & 0x33333333) + ((x & 0xcccccccc) >> 2);
x = (x + (x >> 4)) & 0x0f0f0f0f;
x = (x + (x >> 8));
x = (x + (x >> 16)) & 0x000000ff;
return (x);
}
static __inline int
off2ext(const struct activemap *amp, off_t offset)
{
int extent;
assert(offset >= 0 && offset < amp->am_mediasize);
extent = (offset >> amp->am_extentshift);
assert(extent >= 0 && extent < amp->am_nextents);
return (extent);
}
static __inline off_t
ext2off(const struct activemap *amp, int extent)
{
off_t offset;
assert(extent >= 0 && extent < amp->am_nextents);
offset = ((off_t)extent << amp->am_extentshift);
assert(offset >= 0 && offset < amp->am_mediasize);
return (offset);
}
/*
* Function calculates number of requests needed to synchronize the given
* extent.
*/
static __inline int
ext2reqs(const struct activemap *amp, int ext)
{
off_t left;
if (ext < amp->am_nextents - 1)
return (((amp->am_extentsize - 1) / MAXPHYS) + 1);
assert(ext == amp->am_nextents - 1);
left = amp->am_mediasize % amp->am_extentsize;
if (left == 0)
left = amp->am_extentsize;
return (((left - 1) / MAXPHYS) + 1);
}
/*
* Initialize activemap structure and allocate memory for internal needs.
* Function returns 0 on success and -1 if any of the allocations failed.
*/
int
activemap_init(struct activemap **ampp, uint64_t mediasize, uint32_t extentsize,
uint32_t sectorsize, uint32_t keepdirty)
{
struct activemap *amp;
assert(ampp != NULL);
assert(mediasize > 0);
assert(extentsize > 0);
assert(powerof2(extentsize));
assert(sectorsize > 0);
assert(powerof2(sectorsize));
assert(keepdirty > 0);
amp = malloc(sizeof(*amp));
if (amp == NULL)
return (-1);
amp->am_mediasize = mediasize;
amp->am_nkeepdirty_limit = keepdirty;
amp->am_extentsize = extentsize;
amp->am_extentshift = bitcount32(extentsize - 1);
amp->am_nextents = ((mediasize - 1) / extentsize) + 1;
amp->am_mapsize = sizeof(bitstr_t) * bitstr_size(amp->am_nextents);
amp->am_diskmapsize = roundup2(amp->am_mapsize, sectorsize);
amp->am_ndirty = 0;
amp->am_syncoff = -2;
TAILQ_INIT(&amp->am_keepdirty);
amp->am_nkeepdirty = 0;
amp->am_memtab = calloc(amp->am_nextents, sizeof(amp->am_memtab[0]));
amp->am_diskmap = calloc(1, amp->am_diskmapsize);
amp->am_memmap = bit_alloc(amp->am_nextents);
amp->am_syncmap = bit_alloc(amp->am_nextents);
/*
* Check to see if any of the allocations above failed.
*/
if (amp->am_memtab == NULL || amp->am_diskmap == NULL ||
amp->am_memmap == NULL || amp->am_syncmap == NULL) {
if (amp->am_memtab != NULL)
free(amp->am_memtab);
if (amp->am_diskmap != NULL)
free(amp->am_diskmap);
if (amp->am_memmap != NULL)
free(amp->am_memmap);
if (amp->am_syncmap != NULL)
free(amp->am_syncmap);
amp->am_magic = 0;
free(amp);
errno = ENOMEM;
return (-1);
}
amp->am_magic = ACTIVEMAP_MAGIC;
*ampp = amp;
return (0);
}
static struct keepdirty *
keepdirty_find(struct activemap *amp, int extent)
{
struct keepdirty *kd;
TAILQ_FOREACH(kd, &amp->am_keepdirty, kd_next) {
if (kd->kd_extent == extent)
break;
}
return (kd);
}
static void
keepdirty_add(struct activemap *amp, int extent)
{
struct keepdirty *kd;
kd = keepdirty_find(amp, extent);
if (kd != NULL) {
/*
* Only move element at the begining.
*/
TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
TAILQ_INSERT_HEAD(&amp->am_keepdirty, kd, kd_next);
return;
}
/*
* Add new element, but first remove the most unused one if
* we have too many.
*/
if (amp->am_nkeepdirty >= amp->am_nkeepdirty_limit) {
kd = TAILQ_LAST(&amp->am_keepdirty, skeepdirty);
assert(kd != NULL);
TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
amp->am_nkeepdirty--;
assert(amp->am_nkeepdirty > 0);
}
if (kd == NULL)
kd = malloc(sizeof(*kd));
/* We can ignore allocation failure. */
if (kd != NULL) {
kd->kd_extent = extent;
amp->am_nkeepdirty++;
TAILQ_INSERT_HEAD(&amp->am_keepdirty, kd, kd_next);
}
}
static void
keepdirty_fill(struct activemap *amp)
{
struct keepdirty *kd;
TAILQ_FOREACH(kd, &amp->am_keepdirty, kd_next)
bit_set(amp->am_diskmap, kd->kd_extent);
}
static void
keepdirty_free(struct activemap *amp)
{
struct keepdirty *kd;
while ((kd = TAILQ_FIRST(&amp->am_keepdirty)) != NULL) {
TAILQ_REMOVE(&amp->am_keepdirty, kd, kd_next);
amp->am_nkeepdirty--;
free(kd);
}
assert(amp->am_nkeepdirty == 0);
}
/*
* Function frees resources allocated by activemap_init() function.
*/
void
activemap_free(struct activemap *amp)
{
assert(amp->am_magic == ACTIVEMAP_MAGIC);
amp->am_magic = 0;
keepdirty_free(amp);
free(amp->am_memtab);
free(amp->am_diskmap);
free(amp->am_memmap);
free(amp->am_syncmap);
}
/*
* Function should be called before we handle write requests. It updates
* internal structures and returns true if on-disk metadata should be updated.
*/
bool
activemap_write_start(struct activemap *amp, off_t offset, off_t length)
{
bool modified;
off_t end;
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
assert(length > 0);
modified = false;
end = offset + length - 1;
for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
/*
* If the number of pending writes is increased from 0,
* we have to mark the extent as dirty also in on-disk bitmap.
* By returning true we inform the caller that on-disk bitmap
* was modified and has to be flushed to disk.
*/
if (amp->am_memtab[ext]++ == 0) {
assert(!bit_test(amp->am_memmap, ext));
bit_set(amp->am_memmap, ext);
amp->am_ndirty++;
modified = true;
}
keepdirty_add(amp, ext);
}
return (modified);
}
/*
* Function should be called after receiving write confirmation. It updates
* internal structures and returns true if on-disk metadata should be updated.
*/
bool
activemap_write_complete(struct activemap *amp, off_t offset, off_t length)
{
bool modified;
off_t end;
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
assert(length > 0);
modified = false;
end = offset + length - 1;
for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
/*
* If the number of pending writes goes down to 0, we have to
* mark the extent as clean also in on-disk bitmap.
* By returning true we inform the caller that on-disk bitmap
* was modified and has to be flushed to disk.
*/
assert(amp->am_memtab[ext] > 0);
assert(bit_test(amp->am_memmap, ext));
if (--amp->am_memtab[ext] == 0) {
bit_clear(amp->am_memmap, ext);
amp->am_ndirty--;
modified = true;
}
}
return (modified);
}
/*
* Function should be called after finishing synchronization of one extent.
* It returns true if on-disk metadata should be updated.
*/
bool
activemap_extent_complete(struct activemap *amp, int extent)
{
bool modified;
int reqs;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
assert(extent >= 0 && extent < amp->am_nextents);
modified = false;
reqs = ext2reqs(amp, extent);
assert(amp->am_memtab[extent] >= reqs);
amp->am_memtab[extent] -= reqs;
assert(bit_test(amp->am_memmap, extent));
if (amp->am_memtab[extent] == 0) {
bit_clear(amp->am_memmap, extent);
amp->am_ndirty--;
modified = true;
}
return (modified);
}
/*
* Function returns number of dirty regions.
*/
uint64_t
activemap_ndirty(const struct activemap *amp)
{
assert(amp->am_magic == ACTIVEMAP_MAGIC);
return (amp->am_ndirty);
}
/*
* Function compare on-disk bitmap and in-memory bitmap and returns true if
* they differ and should be flushed to the disk.
*/
bool
activemap_differ(const struct activemap *amp)
{
assert(amp->am_magic == ACTIVEMAP_MAGIC);
return (memcmp(amp->am_diskmap, amp->am_memmap,
amp->am_mapsize) != 0);
}
/*
* Function returns number of bytes used by bitmap.
*/
size_t
activemap_size(const struct activemap *amp)
{
assert(amp->am_magic == ACTIVEMAP_MAGIC);
return (amp->am_mapsize);
}
/*
* Function returns number of bytes needed for storing on-disk bitmap.
* This is the same as activemap_size(), but rounded up to sector size.
*/
size_t
activemap_ondisk_size(const struct activemap *amp)
{
assert(amp->am_magic == ACTIVEMAP_MAGIC);
return (amp->am_diskmapsize);
}
/*
* Function copies the given buffer read from disk to the internal bitmap.
*/
void
activemap_copyin(struct activemap *amp, const unsigned char *buf, size_t size)
{
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
assert(size >= amp->am_mapsize);
memcpy(amp->am_diskmap, buf, amp->am_mapsize);
memcpy(amp->am_memmap, buf, amp->am_mapsize);
memcpy(amp->am_syncmap, buf, amp->am_mapsize);
bit_ffs(amp->am_memmap, amp->am_nextents, &ext);
if (ext == -1) {
/* There are no dirty extents, so we can leave now. */
return;
}
/*
* Set synchronization offset to the first dirty extent.
*/
activemap_sync_rewind(amp);
/*
* We have dirty extents and we want them to stay that way until
* we synchronize, so we set number of pending writes to number
* of requests needed to synchronize one extent.
*/
amp->am_ndirty = 0;
for (; ext < amp->am_nextents; ext++) {
if (bit_test(amp->am_memmap, ext)) {
amp->am_memtab[ext] = ext2reqs(amp, ext);
amp->am_ndirty++;
}
}
}
/*
* Function merges the given bitmap with existng one.
*/
void
activemap_merge(struct activemap *amp, const unsigned char *buf, size_t size)
{
bitstr_t *remmap = __DECONST(bitstr_t *, buf);
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
assert(size >= amp->am_mapsize);
bit_ffs(remmap, amp->am_nextents, &ext);
if (ext == -1) {
/* There are no dirty extents, so we can leave now. */
return;
}
/*
* We have dirty extents and we want them to stay that way until
* we synchronize, so we set number of pending writes to number
* of requests needed to synchronize one extent.
*/
for (; ext < amp->am_nextents; ext++) {
/* Local extent already dirty. */
if (bit_test(amp->am_syncmap, ext))
continue;
/* Remote extent isn't dirty. */
if (!bit_test(remmap, ext))
continue;
bit_set(amp->am_syncmap, ext);
bit_set(amp->am_memmap, ext);
bit_set(amp->am_diskmap, ext);
if (amp->am_memtab[ext] == 0)
amp->am_ndirty++;
amp->am_memtab[ext] = ext2reqs(amp, ext);
}
/*
* Set synchronization offset to the first dirty extent.
*/
activemap_sync_rewind(amp);
}
/*
* Function returns pointer to internal bitmap that should be written to disk.
*/
const unsigned char *
activemap_bitmap(struct activemap *amp, size_t *sizep)
{
assert(amp->am_magic == ACTIVEMAP_MAGIC);
if (sizep != NULL)
*sizep = amp->am_diskmapsize;
memcpy(amp->am_diskmap, amp->am_memmap, amp->am_mapsize);
keepdirty_fill(amp);
return ((const unsigned char *)amp->am_diskmap);
}
/*
* Function calculates size needed to store bitmap on disk.
*/
size_t
activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize,
uint32_t sectorsize)
{
uint64_t nextents, mapsize;
assert(mediasize > 0);
assert(extentsize > 0);
assert(powerof2(extentsize));
assert(sectorsize > 0);
assert(powerof2(sectorsize));
nextents = ((mediasize - 1) / extentsize) + 1;
mapsize = sizeof(bitstr_t) * bitstr_size(nextents);
return (roundup2(mapsize, sectorsize));
}
/*
* Set synchronization offset to the first dirty extent.
*/
void
activemap_sync_rewind(struct activemap *amp)
{
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
if (ext == -1) {
/* There are no extents to synchronize. */
amp->am_syncoff = -2;
return;
}
/*
* Mark that we want to start synchronization from the begining.
*/
amp->am_syncoff = -1;
}
/*
* Return next offset of where we should synchronize.
*/
off_t
activemap_sync_offset(struct activemap *amp, off_t *lengthp, int *syncextp)
{
off_t syncoff, left;
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
assert(lengthp != NULL);
assert(syncextp != NULL);
*syncextp = -1;
if (amp->am_syncoff == -2)
return (-1);
if (amp->am_syncoff >= 0 &&
(amp->am_syncoff + MAXPHYS >= amp->am_mediasize ||
off2ext(amp, amp->am_syncoff) !=
off2ext(amp, amp->am_syncoff + MAXPHYS))) {
/*
* We are about to change extent, so mark previous one as clean.
*/
ext = off2ext(amp, amp->am_syncoff);
bit_clear(amp->am_syncmap, ext);
*syncextp = ext;
amp->am_syncoff = -1;
}
if (amp->am_syncoff == -1) {
/*
* Let's find first extent to synchronize.
*/
bit_ffs(amp->am_syncmap, amp->am_nextents, &ext);
if (ext == -1) {
amp->am_syncoff = -2;
return (-1);
}
amp->am_syncoff = ext2off(amp, ext);
} else {
/*
* We don't change extent, so just increase offset.
*/
amp->am_syncoff += MAXPHYS;
if (amp->am_syncoff >= amp->am_mediasize) {
amp->am_syncoff = -2;
return (-1);
}
}
syncoff = amp->am_syncoff;
left = ext2off(amp, off2ext(amp, syncoff)) +
amp->am_extentsize - syncoff;
if (syncoff + left > amp->am_mediasize)
left = amp->am_mediasize - syncoff;
if (left > MAXPHYS)
left = MAXPHYS;
assert(left >= 0 && left <= MAXPHYS);
assert(syncoff >= 0 && syncoff < amp->am_mediasize);
assert(syncoff + left >= 0 && syncoff + left <= amp->am_mediasize);
*lengthp = left;
return (syncoff);
}
/*
* Mark extent(s) containing the given region for synchronization.
* Most likely one of the components is unavailable.
*/
bool
activemap_need_sync(struct activemap *amp, off_t offset, off_t length)
{
bool modified;
off_t end;
int ext;
assert(amp->am_magic == ACTIVEMAP_MAGIC);
modified = false;
end = offset + length - 1;
for (ext = off2ext(amp, offset); ext <= off2ext(amp, end); ext++) {
if (bit_test(amp->am_syncmap, ext)) {
/* Already marked for synchronization. */
assert(bit_test(amp->am_memmap, ext));
continue;
}
bit_set(amp->am_syncmap, ext);
if (!bit_test(amp->am_memmap, ext)) {
bit_set(amp->am_memmap, ext);
amp->am_ndirty++;
}
amp->am_memtab[ext] += ext2reqs(amp, ext);
modified = true;
}
return (modified);
}
void
activemap_dump(const struct activemap *amp)
{
int bit;
printf("M: ");
for (bit = 0; bit < amp->am_nextents; bit++)
printf("%d", bit_test(amp->am_memmap, bit) ? 1 : 0);
printf("\n");
printf("D: ");
for (bit = 0; bit < amp->am_nextents; bit++)
printf("%d", bit_test(amp->am_diskmap, bit) ? 1 : 0);
printf("\n");
printf("S: ");
for (bit = 0; bit < amp->am_nextents; bit++)
printf("%d", bit_test(amp->am_syncmap, bit) ? 1 : 0);
printf("\n");
}

69
sbin/hastd/activemap.h Normal file
View File

@ -0,0 +1,69 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _ACTIVEMAP_H_
#define _ACTIVEMAP_H_
#include <stdbool.h>
#include <stdint.h>
struct activemap;
int activemap_init(struct activemap **ampp, uint64_t mediasize,
uint32_t extentsize, uint32_t sectorsize, uint32_t keepdirty);
void activemap_free(struct activemap *amp);
bool activemap_write_start(struct activemap *amp, off_t offset, off_t length);
bool activemap_write_complete(struct activemap *amp, off_t offset,
off_t length);
bool activemap_extent_complete(struct activemap *amp, int extent);
uint64_t activemap_ndirty(const struct activemap *amp);
bool activemap_differ(const struct activemap *amp);
size_t activemap_size(const struct activemap *amp);
size_t activemap_ondisk_size(const struct activemap *amp);
void activemap_copyin(struct activemap *amp, const unsigned char *buf,
size_t size);
void activemap_merge(struct activemap *amp, const unsigned char *buf,
size_t size);
const unsigned char *activemap_bitmap(struct activemap *amp, size_t *sizep);
size_t activemap_calc_ondisk_size(uint64_t mediasize, uint32_t extentsize,
uint32_t sectorsize);
void activemap_sync_rewind(struct activemap *amp);
off_t activemap_sync_offset(struct activemap *amp, off_t *lengthp,
int *syncextp);
bool activemap_need_sync(struct activemap *amp, off_t offset, off_t length);
void activemap_dump(const struct activemap *amp);
#endif /* !_ACTIVEMAP_H_ */

426
sbin/hastd/control.c Normal file
View File

@ -0,0 +1,426 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
#include <string.h>
#include "hast.h"
#include "hastd.h"
#include "hast_proto.h"
#include "nv.h"
#include "pjdlog.h"
#include "proto.h"
#include "subr.h"
#include "control.h"
static void
control_set_role(struct hastd_config *cfg, struct nv *nvout, uint8_t role,
struct hast_resource *res, const char *name, unsigned int no)
{
assert(cfg != NULL);
assert(nvout != NULL);
assert(name != NULL);
/* Name is always needed. */
nv_add_string(nvout, name, "resource%u", no);
if (res == NULL) {
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (strcmp(res->hr_name, name) == 0)
break;
}
if (res == NULL) {
nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
return;
}
}
assert(res != NULL);
/* Send previous role back. */
nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
/* Nothing changed, return here. */
if (role == res->hr_role)
return;
pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
pjdlog_info("Role changed to %s.", role2str(role));
/* Change role to the new one. */
res->hr_role = role;
pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
/*
* If previous role was primary or secondary we have to kill process
* doing that work.
*/
if (res->hr_workerpid != 0) {
if (kill(res->hr_workerpid, SIGTERM) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to kill worker process %u",
(unsigned int)res->hr_workerpid);
} else if (waitpid(res->hr_workerpid, NULL, 0) !=
res->hr_workerpid) {
pjdlog_errno(LOG_WARNING,
"Error while waiting for worker process %u",
(unsigned int)res->hr_workerpid);
} else {
pjdlog_debug(1, "Worker process %u stopped.",
(unsigned int)res->hr_workerpid);
}
res->hr_workerpid = 0;
}
/* Start worker process if we are changing to primary. */
if (role == HAST_ROLE_PRIMARY)
hastd_primary(res);
pjdlog_prefix_set("%s", "");
}
static void
control_status_worker(struct hast_resource *res, struct nv *nvout,
unsigned int no)
{
struct nv *cnvin, *cnvout;
const char *str;
int error;
cnvin = cnvout = NULL;
error = 0;
/*
* Prepare and send command to worker process.
*/
cnvout = nv_alloc();
nv_add_uint8(cnvout, HASTCTL_STATUS, "cmd");
error = nv_error(cnvout);
if (error != 0) {
/* LOG */
goto end;
}
if (hast_proto_send(res, res->hr_ctrl, cnvout, NULL, 0) < 0) {
error = errno;
/* LOG */
goto end;
}
/*
* Receive response.
*/
if (hast_proto_recv_hdr(res->hr_ctrl, &cnvin) < 0) {
error = errno;
/* LOG */
goto end;
}
error = nv_get_int64(cnvin, "error");
if (error != 0)
goto end;
if ((str = nv_get_string(cnvin, "status")) == NULL) {
error = ENOENT;
/* LOG */
goto end;
}
nv_add_string(nvout, str, "status%u", no);
nv_add_uint64(nvout, nv_get_uint64(cnvin, "dirty"), "dirty%u", no);
nv_add_uint32(nvout, nv_get_uint32(cnvin, "extentsize"),
"extentsize%u", no);
nv_add_uint32(nvout, nv_get_uint32(cnvin, "keepdirty"),
"keepdirty%u", no);
end:
if (cnvin != NULL)
nv_free(cnvin);
if (cnvout != NULL)
nv_free(cnvout);
if (error != 0)
nv_add_int16(nvout, error, "error");
}
static void
control_status(struct hastd_config *cfg, struct nv *nvout,
struct hast_resource *res, const char *name, unsigned int no)
{
assert(cfg != NULL);
assert(nvout != NULL);
assert(name != NULL);
/* Name is always needed. */
nv_add_string(nvout, name, "resource%u", no);
if (res == NULL) {
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (strcmp(res->hr_name, name) == 0)
break;
}
if (res == NULL) {
nv_add_int16(nvout, EHAST_NOENTRY, "error%u", no);
return;
}
}
assert(res != NULL);
nv_add_string(nvout, res->hr_provname, "provname%u", no);
nv_add_string(nvout, res->hr_localpath, "localpath%u", no);
nv_add_string(nvout, res->hr_remoteaddr, "remoteaddr%u", no);
switch (res->hr_replication) {
case HAST_REPLICATION_FULLSYNC:
nv_add_string(nvout, "fullsync", "replication%u", no);
break;
case HAST_REPLICATION_MEMSYNC:
nv_add_string(nvout, "memsync", "replication%u", no);
break;
case HAST_REPLICATION_ASYNC:
nv_add_string(nvout, "async", "replication%u", no);
break;
default:
nv_add_string(nvout, "unknown", "replication%u", no);
break;
}
nv_add_string(nvout, role2str(res->hr_role), "role%u", no);
switch (res->hr_role) {
case HAST_ROLE_PRIMARY:
assert(res->hr_workerpid != 0);
/* FALLTHROUGH */
case HAST_ROLE_SECONDARY:
if (res->hr_workerpid != 0)
break;
/* FALLTHROUGH */
default:
return;
}
/*
* If we are here, it means that we have a worker process, which we
* want to ask some questions.
*/
control_status_worker(res, nvout, no);
}
void
control_handle(struct hastd_config *cfg)
{
struct proto_conn *conn;
struct nv *nvin, *nvout;
unsigned int ii;
const char *str;
uint8_t cmd, role;
int error;
if (proto_accept(cfg->hc_controlconn, &conn) < 0) {
pjdlog_errno(LOG_ERR, "Unable to accept control connection");
return;
}
nvin = nvout = NULL;
role = HAST_ROLE_UNDEF;
if (hast_proto_recv_hdr(conn, &nvin) < 0) {
pjdlog_errno(LOG_ERR, "Unable to receive control header");
nvin = NULL;
goto close;
}
/* Obtain command code. 0 means that nv_get_uint8() failed. */
cmd = nv_get_uint8(nvin, "cmd");
if (cmd == 0) {
pjdlog_error("Control header is missing 'cmd' field.");
error = EHAST_INVALID;
goto close;
}
/* Allocate outgoing nv structure. */
nvout = nv_alloc();
if (nvout == NULL) {
pjdlog_error("Unable to allocate header for control response.");
error = EHAST_NOMEMORY;
goto close;
}
error = 0;
str = nv_get_string(nvin, "resource0");
if (str == NULL) {
pjdlog_error("Control header is missing 'resource0' field.");
error = EHAST_INVALID;
goto fail;
}
if (cmd == HASTCTL_SET_ROLE) {
role = nv_get_uint8(nvin, "role");
switch (role) {
case HAST_ROLE_INIT: /* Is that valid to set, hmm? */
case HAST_ROLE_PRIMARY:
case HAST_ROLE_SECONDARY:
break;
default:
pjdlog_error("Invalid role received (%hhu).", role);
error = EHAST_INVALID;
goto fail;
}
}
if (strcmp(str, "all") == 0) {
struct hast_resource *res;
/* All configured resources. */
ii = 0;
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
switch (cmd) {
case HASTCTL_SET_ROLE:
control_set_role(cfg, nvout, role, res,
res->hr_name, ii++);
break;
case HASTCTL_STATUS:
control_status(cfg, nvout, res, res->hr_name,
ii++);
break;
default:
pjdlog_error("Invalid command received (%hhu).",
cmd);
error = EHAST_UNIMPLEMENTED;
goto fail;
}
}
} else {
/* Only selected resources. */
for (ii = 0; ; ii++) {
str = nv_get_string(nvin, "resource%u", ii);
if (str == NULL)
break;
switch (cmd) {
case HASTCTL_SET_ROLE:
control_set_role(cfg, nvout, role, NULL, str,
ii);
break;
case HASTCTL_STATUS:
control_status(cfg, nvout, NULL, str, ii);
break;
default:
pjdlog_error("Invalid command received (%hhu).",
cmd);
error = EHAST_UNIMPLEMENTED;
goto fail;
}
}
}
if (nv_error(nvout) != 0)
goto close;
fail:
if (error != 0)
nv_add_int16(nvout, error, "error");
if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0)
pjdlog_errno(LOG_ERR, "Unable to send control response");
close:
if (nvin != NULL)
nv_free(nvin);
if (nvout != NULL)
nv_free(nvout);
proto_close(conn);
}
/*
* Thread handles control requests from the parent.
*/
void *
ctrl_thread(void *arg)
{
struct hast_resource *res = arg;
struct nv *nvin, *nvout;
uint8_t cmd;
for (;;) {
if (hast_proto_recv_hdr(res->hr_ctrl, &nvin) < 0) {
if (sigexit_received)
pthread_exit(NULL);
pjdlog_errno(LOG_ERR,
"Unable to receive control message");
continue;
}
cmd = nv_get_uint8(nvin, "cmd");
if (cmd == 0) {
pjdlog_error("Control message is missing 'cmd' field.");
nv_free(nvin);
continue;
}
nv_free(nvin);
nvout = nv_alloc();
switch (cmd) {
case HASTCTL_STATUS:
if (res->hr_remotein != NULL &&
res->hr_remoteout != NULL) {
nv_add_string(nvout, "complete", "status");
} else {
nv_add_string(nvout, "degraded", "status");
}
nv_add_uint32(nvout, (uint32_t)res->hr_extentsize,
"extentsize");
if (res->hr_role == HAST_ROLE_PRIMARY) {
nv_add_uint32(nvout,
(uint32_t)res->hr_keepdirty, "keepdirty");
nv_add_uint64(nvout,
(uint64_t)(activemap_ndirty(res->hr_amp) *
res->hr_extentsize), "dirty");
} else {
nv_add_uint32(nvout, (uint32_t)0, "keepdirty");
nv_add_uint64(nvout, (uint64_t)0, "dirty");
}
break;
default:
nv_add_int16(nvout, EINVAL, "error");
break;
}
if (nv_error(nvout) != 0) {
pjdlog_error("Unable to create answer on control message.");
nv_free(nvout);
continue;
}
if (hast_proto_send(NULL, res->hr_ctrl, nvout, NULL, 0) < 0) {
pjdlog_errno(LOG_ERR,
"Unable to send reply to control message");
}
nv_free(nvout);
}
/* NOTREACHED */
return (NULL);
}

44
sbin/hastd/control.h Normal file
View File

@ -0,0 +1,44 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _CONTROL_H_
#define _CONTROL_H_
#define HASTCTL_SET_ROLE 1
#define HASTCTL_STATUS 2
struct hastd_config;
void control_handle(struct hastd_config *cfg);
void *ctrl_thread(void *arg);
#endif /* !_CONTROL_H_ */

252
sbin/hastd/ebuf.c Normal file
View File

@ -0,0 +1,252 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <strings.h>
#include <unistd.h>
#include "ebuf.h"
#define EBUF_MAGIC 0xeb0f41c
struct ebuf {
/* Magic to assert the caller uses valid structure. */
int eb_magic;
/* Address where we did the allocation. */
unsigned char *eb_start;
/* Allocation end address. */
unsigned char *eb_end;
/* Start of real data. */
unsigned char *eb_used;
/* Size of real data. */
size_t eb_size;
};
static int ebuf_head_extent(struct ebuf *eb, size_t size);
static int ebuf_tail_extent(struct ebuf *eb, size_t size);
struct ebuf *
ebuf_alloc(size_t size)
{
struct ebuf *eb;
int rerrno;
eb = malloc(sizeof(*eb));
if (eb == NULL)
return (NULL);
size += PAGE_SIZE;
eb->eb_start = malloc(size);
if (eb->eb_start == NULL) {
rerrno = errno;
free(eb);
errno = rerrno;
return (NULL);
}
eb->eb_end = eb->eb_start + size;
/*
* We set start address for real data not at the first entry, because
* we want to be able to add data at the front.
*/
eb->eb_used = eb->eb_start + PAGE_SIZE / 4;
eb->eb_size = 0;
eb->eb_magic = EBUF_MAGIC;
return (eb);
}
void
ebuf_free(struct ebuf *eb)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
eb->eb_magic = 0;
free(eb->eb_start);
free(eb);
}
int
ebuf_add_head(struct ebuf *eb, const void *data, size_t size)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
if (size > (size_t)(eb->eb_used - eb->eb_start)) {
/*
* We can't add more entries at the front, so we have to extend
* our buffer.
*/
if (ebuf_head_extent(eb, size) < 0)
return (-1);
}
assert(size <= (size_t)(eb->eb_used - eb->eb_start));
eb->eb_size += size;
eb->eb_used -= size;
/*
* If data is NULL the caller just wants to reserve place.
*/
if (data != NULL)
bcopy(data, eb->eb_used, size);
return (0);
}
int
ebuf_add_tail(struct ebuf *eb, const void *data, size_t size)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
if (size > (size_t)(eb->eb_end - (eb->eb_used + eb->eb_size))) {
/*
* We can't add more entries at the back, so we have to extend
* our buffer.
*/
if (ebuf_tail_extent(eb, size) < 0)
return (-1);
}
assert(size <= (size_t)(eb->eb_end - (eb->eb_used + eb->eb_size)));
/*
* If data is NULL the caller just wants to reserve place.
*/
if (data != NULL)
bcopy(data, eb->eb_used + eb->eb_size, size);
eb->eb_size += size;
return (0);
}
void
ebuf_del_head(struct ebuf *eb, size_t size)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
assert(size <= eb->eb_size);
eb->eb_used += size;
eb->eb_size -= size;
}
void
ebuf_del_tail(struct ebuf *eb, size_t size)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
assert(size <= eb->eb_size);
eb->eb_size -= size;
}
/*
* Return pointer to the data and data size.
*/
void *
ebuf_data(struct ebuf *eb, size_t *sizep)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
if (sizep != NULL)
*sizep = eb->eb_size;
return (eb->eb_size > 0 ? eb->eb_used : NULL);
}
/*
* Return data size.
*/
size_t
ebuf_size(struct ebuf *eb)
{
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
return (eb->eb_size);
}
/*
* Function adds size + (PAGE_SIZE / 4) bytes at the front of the buffer..
*/
static int
ebuf_head_extent(struct ebuf *eb, size_t size)
{
unsigned char *newstart, *newused;
size_t newsize;
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
newsize = eb->eb_end - eb->eb_start + (PAGE_SIZE / 4) + size;
newstart = malloc(newsize);
if (newstart == NULL)
return (-1);
newused =
newstart + (PAGE_SIZE / 4) + size + (eb->eb_used - eb->eb_start);
bcopy(eb->eb_used, newused, eb->eb_size);
eb->eb_start = newstart;
eb->eb_used = newused;
eb->eb_end = newstart + newsize;
return (0);
}
/*
* Function adds size + ((3 * PAGE_SIZE) / 4) bytes at the back.
*/
static int
ebuf_tail_extent(struct ebuf *eb, size_t size)
{
unsigned char *newstart;
size_t newsize;
assert(eb != NULL && eb->eb_magic == EBUF_MAGIC);
newsize = eb->eb_end - eb->eb_start + size + ((3 * PAGE_SIZE) / 4);
newstart = realloc(eb->eb_start, newsize);
if (newstart == NULL)
return (-1);
eb->eb_used = newstart + (eb->eb_used - eb->eb_start);
eb->eb_start = newstart;
eb->eb_end = newstart + newsize;
return (0);
}

51
sbin/hastd/ebuf.h Normal file
View File

@ -0,0 +1,51 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _EBUF_H_
#define _EBUF_H_
#include <stdlib.h> /* size_t */
struct ebuf;
struct ebuf *ebuf_alloc(size_t size);
void ebuf_free(struct ebuf *eb);
int ebuf_add_head(struct ebuf *eb, const void *data, size_t size);
int ebuf_add_tail(struct ebuf *eb, const void *data, size_t size);
void ebuf_del_head(struct ebuf *eb, size_t size);
void ebuf_del_tail(struct ebuf *eb, size_t size);
void *ebuf_data(struct ebuf *eb, size_t *sizep);
size_t ebuf_size(struct ebuf *eb);
#endif /* !_EBUF_H_ */

267
sbin/hastd/hast.conf.5 Normal file
View File

@ -0,0 +1,267 @@
.\" Copyright (c) 2010 The FreeBSD Foundation
.\" All rights reserved.
.\"
.\" This software was developed by Pawel Jakub Dawidek under sponsorship from
.\" the FreeBSD Foundation.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd February 1, 2010
.Dt HAST.CONF 5
.Os
.Sh NAME
.Nm hast.conf
.Nd configuration file for the
.Xr hastd 8
deamon and the
.Xr hastctl 8
utility.
.Sh DESCRIPTION
The
.Nm
file is used by both
.Xr hastd 8
daemon
and
.Xr hastctl 8
control utility.
Configuration file is designed in a way that exactly the same file can be
(and should be) used on both HAST nodes.
Every line starting with # is treated as comment and ignored.
.Sh CONFIGURATION FILE SYNTAX
General syntax of the
.Nm
file is following:
.Bd -literal -offset indent
# Global section
control <addr>
listen <addr>
replication <mode>
on <node> {
# Node section
control <addr>
listen <addr>
}
on <node> {
# Node section
control <addr>
listen <addr>
}
resource <name> {
# Resource section
replication <mode>
name <name>
local <path>
on <node> {
# Resource-node section
name <name>
# Required
local <path>
# Required
remote <addr>
}
on <node> {
# Resource-node section
name <name>
# Required
local <path>
# Required
remote <addr>
}
}
.Ed
.Pp
Most of the various available configuration parameters are optional.
If parameter is not defined in the particular section, it will be
inherited from the parent section.
For example, if the
.Ic listen
parameter is not defined in the node section, it will be inherited from
the global section.
In case the global section does not define the
.Ic listen
parameter at all, the default value will be used.
.Sh CONFIGURATION FILE DESCRIPTION
The
.Aq node
argument can be replaced either by a full hostname as obtained by
.Xr gethostname 3 ,
only first part of the hostname, or by node's UUID as found in the
.Va kern.hostuuid
.Xr sysctl 8
variable.
.Pp
The following statements are available:
.Bl -tag -width ".Ic xxxx"
.It Ic control Aq addr
.Pp
Address for communication with
.Xr hastctl 8 .
Each of the following examples defines the same control address:
.Bd -literal -offset indent
uds:///var/run/hastctl
unix:///var/run/hastctl
/var/run/hastctl
.Ed
.Pp
The default value is
.Pa uds:///var/run/hastctl .
.It Ic listen Aq addr
.Pp
Address to listen on in form of:
.Bd -literal -offset indent
protocol://protocol-specific-address
.Ed
.Pp
Each of the following examples defines the same listen address:
.Bd -literal -offset indent
0.0.0.0
0.0.0.0:8457
tcp://0.0.0.0
tcp://0.0.0.0:8457
tcp4://0.0.0.0
tcp4://0.0.0.0:8457
.Ed
.Pp
The default value is
.Pa tcp4://0.0.0.0:8457 .
.It Ic replication Aq mode
.Pp
Replication mode should be one of the following:
.Bl -tag -width ".Ic xxxx"
.It Ic memsync
.Pp
Report the write operation as completed when local write completes and
when the remote node acknowledges the data receipt, but before it
actually stores the data.
The data on remote node will be stored directly after sending
acknowledgement.
This mode is intended to reduce latency, but still provides a very good
reliability.
The only situation where some small amount of data could be lost is when
the data is stored on primary node and sent to the secondary.
Secondary node then acknowledges data receipt and primary reports
success to an application.
However, it may happen that the seconderay goes down before the received
data is really stored locally.
Before secondary node returns, primary node dies entirely.
When the secondary node comes back to life it becomes the new primary.
Unfortunately some small amount of data which was confirmed to be stored
to the application was lost.
The risk of such a situation is very small, which is the reason for this
mode to be the default.
.It Ic fullsync
.Pp
Mark the write operation as completed when local as well as remote
write completes.
This is the safest and the slowest replication mode.
The
.Ic fullsync
replication mode is currently not implemented.
.It Ic async
.Pp
The write operation is reported as complete right after the local write
completes.
This is the fastest and the most dangerous replication mode.
This mode should be used when replicating to a distant node where
latency is too high for other modes.
The
.Ic async
replication mode is currently not implemented.
.El
.It Ic name Aq name
.Pp
GEOM provider name that will appear as
.Pa /dev/hast/<name> .
If name is not defined, resource name will be used as provider name.
.It Ic local Aq path
.Pp
Path to the local component which will be used as backend provider for
the resource.
This can be either GEOM provider or regular file.
.It Ic remote Aq addr
.Pp
Address of the remote
.Nm hastd
daemon.
Format is the same as for the
.Ic listen
statement.
When operating as a primary node this address will be used to connect to
the secondary node.
When operating as a secondary node only connections from this address
will be accepted.
.El
.Sh EXAMPLES
The example configuration file can look as follows:
.Bd -literal -offset indent
resource shared {
local /dev/da0
on hasta {
remote tcp4://10.0.0.2
}
on hastb {
remote tcp4://10.0.0.1
}
}
resource tank {
on hasta {
local /dev/mirror/tanka
remote tcp4://10.0.0.2
}
on hastb {
local /dev/mirror/tankb
remote tcp4://10.0.0.1
}
}
.Ed
.Sh FILES
.Bl -tag -width ".Pa /var/run/hastctl" -compact
.It Pa /etc/hast.conf
The default
.Nm
configuration file.
.It Pa /var/run/hastctl
Control socket used by the
.Xr hastctl 8
control utility to communicate with the
.Xr hastd 8
daemon.
.El
.Sh SEE ALSO
.Xr gethostname 3 ,
.Xr geom 4 ,
.Xr hastctl 8 ,
.Xr hastd 8 .
.Sh AUTHORS
The
.Nm
was written by
.An Pawel Jakub Dawidek Aq pjd@FreeBSD.org
under sponsorship of the FreeBSD Foundation.

190
sbin/hastd/hast.h Normal file
View File

@ -0,0 +1,190 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _HAST_H_
#define _HAST_H_
#include <sys/queue.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <limits.h>
#include <pthread.h>
#include <stdbool.h>
#include <stdint.h>
#include <activemap.h>
#include "proto.h"
#define HAST_PROTO_VERSION 0
#define EHAST_OK 0
#define EHAST_NOENTRY 1
#define EHAST_INVALID 2
#define EHAST_NOMEMORY 3
#define EHAST_UNIMPLEMENTED 4
#define HASTCTL_CMD_UNKNOWN 0
#define HASTCTL_CMD_SETROLE 1
#define HASTCTL_CMD_STATUS 2
#define HAST_ROLE_UNDEF 0
#define HAST_ROLE_INIT 1
#define HAST_ROLE_PRIMARY 2
#define HAST_ROLE_SECONDARY 3
#define HAST_SYNCSRC_UNDEF 0
#define HAST_SYNCSRC_PRIMARY 1
#define HAST_SYNCSRC_SECONDARY 2
#define HIO_UNDEF 0
#define HIO_READ 1
#define HIO_WRITE 2
#define HIO_DELETE 3
#define HIO_FLUSH 4
#define HAST_CONFIG "/etc/hast.conf"
#define HAST_CONTROL "/var/run/hastctl"
#define HASTD_PORT 8457
#define HASTD_LISTEN "tcp4://0.0.0.0:8457"
#define HASTD_PIDFILE "/var/run/hastd.pid"
/* Default extent size. */
#define HAST_EXTENTSIZE 2097152
/* Default maximum number of extents that are kept dirty. */
#define HAST_KEEPDIRTY 64
#define HAST_ADDRSIZE 1024
#define HAST_TOKEN_SIZE 16
struct hastd_config {
/* Address to communicate with hastctl(8). */
char hc_controladdr[HAST_ADDRSIZE];
/* Protocol-specific data. */
struct proto_conn *hc_controlconn;
/* Address to listen on. */
char hc_listenaddr[HAST_ADDRSIZE];
/* Protocol-specific data. */
struct proto_conn *hc_listenconn;
/* List of resources. */
TAILQ_HEAD(, hast_resource) hc_resources;
};
#define HAST_REPLICATION_FULLSYNC 0
#define HAST_REPLICATION_MEMSYNC 1
#define HAST_REPLICATION_ASYNC 2
/*
* Structure that describes single resource.
*/
struct hast_resource {
/* Resource name. */
char hr_name[NAME_MAX];
/* Replication mode (HAST_REPLICATION_*). */
int hr_replication;
/* Provider name that will appear in /dev/hast/. */
char hr_provname[NAME_MAX];
/* Synchronization extent size. */
int hr_extentsize;
/* Maximum number of extents that are kept dirty. */
int hr_keepdirty;
/* Path to local component. */
char hr_localpath[PATH_MAX];
/* Descriptor to access local component. */
int hr_localfd;
/* Offset into local component. */
off_t hr_localoff;
/* Size of usable space. */
off_t hr_datasize;
/* Size of entire local provider. */
off_t hr_local_mediasize;
/* Sector size of local provider. */
unsigned int hr_local_sectorsize;
/* Descriptor for /dev/ggctl communication. */
int hr_ggatefd;
/* Unit number for ggate communication. */
int hr_ggateunit;
/* Address of the remote component. */
char hr_remoteaddr[HAST_ADDRSIZE];
/* Connection for incoming data. */
struct proto_conn *hr_remotein;
/* Connection for outgoing data. */
struct proto_conn *hr_remoteout;
/* Token to verify both in and out connection are coming from
the same node (not necessarily from the same address). */
unsigned char hr_token[HAST_TOKEN_SIZE];
/* Resource unique identifier. */
uint64_t hr_resuid;
/* Primary's local modification count. */
uint64_t hr_primary_localcnt;
/* Primary's remote modification count. */
uint64_t hr_primary_remotecnt;
/* Secondary's local modification count. */
uint64_t hr_secondary_localcnt;
/* Secondary's remote modification count. */
uint64_t hr_secondary_remotecnt;
/* Synchronization source. */
uint8_t hr_syncsrc;
/* Resource role: HAST_ROLE_{INIT,PRIMARY,SECONDARY}. */
int hr_role;
/* Previous resource role: HAST_ROLE_{INIT,PRIMARY,SECONDARY}. */
int hr_previous_role;
/* PID of child worker process. 0 - no child. */
pid_t hr_workerpid;
/* Control connection between parent and child. */
struct proto_conn *hr_ctrl;
/* Activemap structure. */
struct activemap *hr_amp;
/* Locked used to synchronize access to hr_amp. */
pthread_mutex_t hr_amp_lock;
/* Next resource. */
TAILQ_ENTRY(hast_resource) hr_next;
};
struct hastd_config *yy_config_parse(const char *config);
void yy_config_free(struct hastd_config *config);
void yyerror(const char *);
int yylex(void);
int yyparse(void);
#endif /* !_HAST_H_ */

401
sbin/hastd/hast_proto.c Normal file
View File

@ -0,0 +1,401 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/endian.h>
#include <assert.h>
#include <errno.h>
#include <string.h>
#include <strings.h>
#include <openssl/sha.h>
#include <hast.h>
#include <ebuf.h>
#include <nv.h>
#include <pjdlog.h>
#include <proto.h>
#include "hast_proto.h"
struct hast_main_header {
/* Protocol version. */
uint8_t version;
/* Size of nv headers. */
uint32_t size;
} __packed;
typedef int hps_send_t(struct hast_resource *, struct nv *nv, void **, size_t *, bool *);
typedef int hps_recv_t(struct hast_resource *, struct nv *nv, void **, size_t *, bool *);
struct hast_pipe_stage {
const char *hps_name;
hps_send_t *hps_send;
hps_recv_t *hps_recv;
};
static int compression_send(struct hast_resource *res, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap);
static int compression_recv(struct hast_resource *res, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap);
static int checksum_send(struct hast_resource *res, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap);
static int checksum_recv(struct hast_resource *res, struct nv *nv,
void **datap, size_t *sizep, bool *freedatap);
static struct hast_pipe_stage pipeline[] = {
{ "compression", compression_send, compression_recv },
{ "checksum", checksum_send, checksum_recv }
};
static int
compression_send(struct hast_resource *res, struct nv *nv, void **datap,
size_t *sizep, bool *freedatap)
{
unsigned char *newbuf;
res = res; /* TODO */
/*
* TODO: For now we emulate compression.
* At 80% probability we succeed to compress data, which means we
* allocate new buffer, copy the data over set *freedatap to true.
*/
if (arc4random_uniform(100) < 80) {
uint32_t *origsize;
/*
* Compression succeeded (but we will grow by 4 bytes, not
* shrink for now).
*/
newbuf = malloc(sizeof(uint32_t) + *sizep);
if (newbuf == NULL)
return (-1);
origsize = (void *)newbuf;
*origsize = htole32((uint32_t)*sizep);
nv_add_string(nv, "null", "compression");
if (nv_error(nv) != 0) {
free(newbuf);
errno = nv_error(nv);
return (-1);
}
bcopy(*datap, newbuf + sizeof(uint32_t), *sizep);
if (*freedatap)
free(*datap);
*freedatap = true;
*datap = newbuf;
*sizep = sizeof(uint32_t) + *sizep;
} else {
/*
* Compression failed, so we leave everything as it was.
* It is not critical for compression to succeed.
*/
}
return (0);
}
static int
compression_recv(struct hast_resource *res, struct nv *nv, void **datap,
size_t *sizep, bool *freedatap)
{
unsigned char *newbuf;
const char *algo;
size_t origsize;
res = res; /* TODO */
/*
* TODO: For now we emulate compression.
*/
algo = nv_get_string(nv, "compression");
if (algo == NULL)
return (0); /* No compression. */
if (strcmp(algo, "null") != 0) {
pjdlog_error("Unknown compression algorithm '%s'.", algo);
return (-1); /* Unknown compression algorithm. */
}
origsize = le32toh(*(uint32_t *)*datap);
newbuf = malloc(origsize);
if (newbuf == NULL)
return (-1);
bcopy((unsigned char *)*datap + sizeof(uint32_t), newbuf, origsize);
if (*freedatap)
free(*datap);
*freedatap = true;
*datap = newbuf;
*sizep = origsize;
return (0);
}
static int
checksum_send(struct hast_resource *res, struct nv *nv, void **datap,
size_t *sizep, bool *freedatap __unused)
{
unsigned char hash[SHA256_DIGEST_LENGTH];
SHA256_CTX ctx;
res = res; /* TODO */
SHA256_Init(&ctx);
SHA256_Update(&ctx, *datap, *sizep);
SHA256_Final(hash, &ctx);
nv_add_string(nv, "sha256", "checksum");
nv_add_uint8_array(nv, hash, sizeof(hash), "hash");
return (0);
}
static int
checksum_recv(struct hast_resource *res, struct nv *nv, void **datap,
size_t *sizep, bool *freedatap __unused)
{
unsigned char chash[SHA256_DIGEST_LENGTH];
const unsigned char *rhash;
SHA256_CTX ctx;
const char *algo;
size_t size;
res = res; /* TODO */
algo = nv_get_string(nv, "checksum");
if (algo == NULL)
return (0); /* No checksum. */
if (strcmp(algo, "sha256") != 0) {
pjdlog_error("Unknown checksum algorithm '%s'.", algo);
return (-1); /* Unknown checksum algorithm. */
}
rhash = nv_get_uint8_array(nv, &size, "hash");
if (rhash == NULL) {
pjdlog_error("Checksum algorithm is present, but hash is missing.");
return (-1); /* Hash not found. */
}
if (size != sizeof(chash)) {
pjdlog_error("Invalid hash size (%zu) for %s, should be %zu.",
size, algo, sizeof(chash));
return (-1); /* Different hash size. */
}
SHA256_Init(&ctx);
SHA256_Update(&ctx, *datap, *sizep);
SHA256_Final(chash, &ctx);
if (bcmp(rhash, chash, sizeof(chash)) != 0) {
pjdlog_error("Hash mismatch.");
return (-1); /* Hash mismatch. */
}
return (0);
}
/*
* Send the given nv structure via conn.
* We keep headers in nv structure and pass data in separate argument.
* There can be no data at all (data is NULL then).
*/
int
hast_proto_send(struct hast_resource *res, struct proto_conn *conn,
struct nv *nv, const void *data, size_t size)
{
struct hast_main_header hdr;
struct ebuf *eb;
bool freedata;
void *dptr, *hptr;
size_t hsize;
int ret;
dptr = (void *)(uintptr_t)data;
freedata = false;
ret = -1;
if (data != NULL) {
if (false) {
unsigned int ii;
for (ii = 0; ii < sizeof(pipeline) / sizeof(pipeline[0]);
ii++) {
ret = pipeline[ii].hps_send(res, nv, &dptr, &size,
&freedata);
if (ret == -1)
goto end;
}
ret = -1;
}
nv_add_uint32(nv, size, "size");
if (nv_error(nv) != 0) {
errno = nv_error(nv);
goto end;
}
}
eb = nv_hton(nv);
if (eb == NULL)
goto end;
hdr.version = HAST_PROTO_VERSION;
hdr.size = htole32((uint32_t)ebuf_size(eb));
if (ebuf_add_head(eb, &hdr, sizeof(hdr)) < 0)
goto end;
hptr = ebuf_data(eb, &hsize);
if (proto_send(conn, hptr, hsize) < 0)
goto end;
if (data != NULL && proto_send(conn, dptr, size) < 0)
goto end;
ret = 0;
end:
if (freedata)
free(dptr);
return (ret);
}
int
hast_proto_recv_hdr(struct proto_conn *conn, struct nv **nvp)
{
struct hast_main_header hdr;
struct nv *nv;
struct ebuf *eb;
void *hptr;
eb = NULL;
nv = NULL;
if (proto_recv(conn, &hdr, sizeof(hdr)) < 0)
goto fail;
if (hdr.version != HAST_PROTO_VERSION) {
errno = ERPCMISMATCH;
goto fail;
}
hdr.size = le32toh(hdr.size);
eb = ebuf_alloc(hdr.size);
if (eb == NULL)
goto fail;
if (ebuf_add_tail(eb, NULL, hdr.size) < 0)
goto fail;
hptr = ebuf_data(eb, NULL);
assert(hptr != NULL);
if (proto_recv(conn, hptr, hdr.size) < 0)
goto fail;
nv = nv_ntoh(eb);
if (nv == NULL)
goto fail;
*nvp = nv;
return (0);
fail:
if (nv != NULL)
nv_free(nv);
else if (eb != NULL)
ebuf_free(eb);
return (-1);
}
int
hast_proto_recv_data(struct hast_resource *res, struct proto_conn *conn,
struct nv *nv, void *data, size_t size)
{
unsigned int ii;
bool freedata;
size_t dsize;
void *dptr;
int ret;
assert(data != NULL);
assert(size > 0);
ret = -1;
freedata = false;
dptr = data;
dsize = nv_get_uint32(nv, "size");
if (dsize == 0)
(void)nv_set_error(nv, 0);
else {
if (proto_recv(conn, data, dsize) < 0)
goto end;
if (false) {
for (ii = sizeof(pipeline) / sizeof(pipeline[0]); ii > 0;
ii--) {
assert(!"to be verified");
ret = pipeline[ii - 1].hps_recv(res, nv, &dptr,
&dsize, &freedata);
if (ret == -1)
goto end;
}
ret = -1;
if (dsize < size)
goto end;
/* TODO: 'size' doesn't seem right here. It is maximum data size. */
if (dptr != data)
bcopy(dptr, data, dsize);
}
}
ret = 0;
end:
if (ret < 0) printf("%s:%u %s\n", __func__, __LINE__, strerror(errno));
if (freedata)
free(dptr);
return (ret);
}
int
hast_proto_recv(struct hast_resource *res, struct proto_conn *conn,
struct nv **nvp, void *data, size_t size)
{
struct nv *nv;
size_t dsize;
int ret;
ret = hast_proto_recv_hdr(conn, &nv);
if (ret < 0)
return (ret);
dsize = nv_get_uint32(nv, "size");
if (dsize == 0)
(void)nv_set_error(nv, 0);
else
ret = hast_proto_recv_data(res, conn, nv, data, size);
if (ret < 0)
nv_free(nv);
else
*nvp = nv;
return (ret);
}

48
sbin/hastd/hast_proto.h Normal file
View File

@ -0,0 +1,48 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _HAST_PROTO_H_
#define _HAST_PROTO_H_
#include <stdlib.h> /* size_t */
#include <nv.h>
#include <proto.h>
int hast_proto_send(struct hast_resource *res, struct proto_conn *conn,
struct nv *nv, const void *data, size_t size);
int hast_proto_recv(struct hast_resource *res, struct proto_conn *conn,
struct nv **nvp, void *data, size_t size);
int hast_proto_recv_hdr(struct proto_conn *conn, struct nv **nvp);
int hast_proto_recv_data(struct hast_resource *res, struct proto_conn *conn,
struct nv *nv, void *data, size_t size);
#endif /* !_HAST_PROTO_H_ */

232
sbin/hastd/hastd.8 Normal file
View File

@ -0,0 +1,232 @@
.\" Copyright (c) 2010 The FreeBSD Foundation
.\" All rights reserved.
.\"
.\" This software was developed by Pawel Jakub Dawidek under sponsorship from
.\" the FreeBSD Foundation.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd February 1, 2010
.Dt HASTD 8
.Os
.Sh NAME
.Nm hastd
.Nd "Highly Available Storage daemon"
.Sh SYNOPSIS
.Nm
.Op Fl dFh
.Op Fl c Ar config
.Op Fl P Ar pidfile
.Sh DESCRIPTION
The
.Nm
daemon is responsible for managing highly available GEOM providers.
.Pp
.Nm
allows to transparently store data on two physically separated machines
connected over the TCP/IP network.
Only one machine (cluster node) can actively use storage provided by
.Nm .
This machine is called primary.
The
.Nm
daemon operates on block level, which makes it transparent for file
systems and applications.
.Pp
There is one main
.Nm
daemon which starts new worker process as soon as a role for the given
resource is changed to primary or as soon as a role for the given
resource is changed to secondary and remote (primary) node will
successfully connect to it.
Every worker process gets a new process title (see
.Xr setproctitle 3 ) ,
which describes its role and resource it controls.
The exact format is:
.Bd -literal -offset indent
hastd: <resource name> (<role>)
.Ed
.Pp
When (and only when)
.Nm
operates in primary role for the given resource, corresponding
.Pa /dev/hast/<name>
disk-like device (GEOM provider) is created.
File systems and applications can use this provider to send I/O
requests to.
Every write, delete and flush operation
.Dv ( BIO_WRITE , BIO_DELETE , BIO_FLUSH )
is send to local component and synchronously replicated
to the remote (secondary) node if it is available.
Read operations
.Dv ( BIO_READ )
are handled locally unless I/O error occurs or local version of the data
is not up-to-date yet (synchronization is in progress).
.Pp
The
.Nm
daemon uses the GEOM Gate class to receive I/O requests from the
in-kernel GEOM infrastructure.
The
.Nm geom_gate.ko
module is loaded automatically if the kernel was not compiled with the
following option:
.Bd -ragged -offset indent
.Cd "options GEOM_GATE"
.Ed
.Pp
The connection between two
.Nm
daemons is always initiated from the one running as primary to the one
running as secondary.
When primary
.Nm
is unable to connect or connection fails, it will try to re-establish
connection every few seconds.
Once connection is established, primary
.Nm
will synchronize every extent that was modified during connection outage
to the secondary
.Nm .
.Pp
It is possible that in case of connection outage between the nodes
.Nm
primary role for the given resource will be configured on both nodes.
This in turn leads to incompatible data modifications.
Such condition is called split-brain and cannot be automatically
resolved by the
.Nm
daemon as this will lead most likely to data corruption or lost of
important changes.
Even though it cannot be fixed by
.Nm
itself, it will be detected and further connection between independently
modified nodes will not be possible.
Once this situation is manually resolved by an administrator, resource
on one of the nodes can be initialized (erasing local data), which makes
connection to the remote node possible again.
Connection of freshly initialized component will trigger full resource
synchronization.
.Pp
The
.Nm
daemon itself never picks his role up automatically.
The role has to be configured with the
.Xr hastctl 8
control utility by additional software like
.Nm ucarp
or
.Nm heartbeat
that can reliably manage role separation and switch secondary node to
primary role in case of original primary failure.
.Pp
The
.Nm
daemon can be started with the following command line arguments:
.Bl -tag -width ".Fl P Ar pidfile"
.It Fl c Ar config
Specify alternative location of the configuration file.
The default location is
.Pa /etc/hast.conf .
.It Fl d
Print or log debugging information.
This option can be specified multiple times to raise the verbosity
level.
.It Fl F
Start the
.Nm
daemon in the foreground.
By default
.Nm
starts in the background.
.It Fl h
Print the
.Nm
usage message.
.It Fl P Ar pidfile
Specify alternative location of a file where main process PID will be
stored.
The default location is
.Pa /var/run/hastd.pid .
.El
.Sh EXIT STATUS
Exit status is 0 on success, or one of the values described in
.Xr sysexits 3
on failure.
.Sh EXAMPLES
Launch
.Nm
on both nodes.
Set role for resource
.Nm shared
to primary on
.Nm nodeA
and to secondary on
.Nm nodeB .
Create file system on
.Pa /dev/hast/shared
provider and mount it.
.Bd -literal -offset indent
nodeB# hastd
nodeB# hastctl role secondary shared
nodeA# hastd
nodeA# hastctl role primary shared
nodeA# newfs -U /dev/hast/shared
nodeA# mount -o noatime /dev/hast/shared /shared
.Ed
.Sh FILES
.Bl -tag -width ".Pa /var/run/hastctl" -compact
.It Pa /etc/hast.conf
The configuration file for
.Nm
and
.Xr hastctl 8 .
.It Pa /var/run/hastctl
Control socket used by the
.Xr hastctl 8
control utility to communicate with
.Nm .
.It Pa /var/run/hastd.pid
The default location of the
.Nm
PID file.
.El
.Sh SEE ALSO
.Xr sysexits 3 ,
.Xr geom 4 ,
.Xr hast.conf 5 ,
.Xr ggatec 8 ,
.Xr ggated 8 ,
.Xr ggatel 8 ,
.Xr hastctl 8 ,
.Xr mount 8 ,
.Xr newfs 8 ,
.Xr g_bio 9 .
.Sh AUTHORS
The
.Nm
was developed by
.An Pawel Jakub Dawidek Aq pjd@FreeBSD.org
under sponsorship of the FreeBSD Foundation.

522
sbin/hastd/hastd.c Normal file
View File

@ -0,0 +1,522 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/linker.h>
#include <sys/module.h>
#include <sys/wait.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <libutil.h>
#include <signal.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <activemap.h>
#include <pjdlog.h>
#include "control.h"
#include "hast.h"
#include "hast_proto.h"
#include "hastd.h"
#include "subr.h"
/* Path to configuration file. */
static const char *cfgpath = HAST_CONFIG;
/* Hastd configuration. */
static struct hastd_config *cfg;
/* Was SIGCHLD signal received? */
static bool sigchld_received = false;
/* Was SIGHUP signal received? */
static bool sighup_received = false;
/* Was SIGINT or SIGTERM signal received? */
bool sigexit_received = false;
/* PID file handle. */
struct pidfh *pfh;
static void
usage(void)
{
errx(EX_USAGE, "[-dFh] [-c config] [-P pidfile]");
}
static void
sighandler(int sig)
{
switch (sig) {
case SIGCHLD:
sigchld_received = true;
break;
case SIGHUP:
sighup_received = true;
break;
default:
assert(!"invalid condition");
}
}
static void
g_gate_load(void)
{
if (modfind("g_gate") == -1) {
/* Not present in kernel, try loading it. */
if (kldload("geom_gate") == -1 || modfind("g_gate") == -1) {
if (errno != EEXIST) {
pjdlog_exit(EX_OSERR,
"Unable to load geom_gate module");
}
}
}
}
static void
child_exit(void)
{
struct hast_resource *res;
int status;
pid_t pid;
while ((pid = wait3(&status, WNOHANG, NULL)) > 0) {
/* Find resource related to the process that just exited. */
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (pid == res->hr_workerpid)
break;
}
if (res == NULL) {
/*
* This can happen when new connection arrives and we
* cancel child responsible for the old one.
*/
continue;
}
pjdlog_prefix_set("[%s] (%s) ", res->hr_name,
role2str(res->hr_role));
if (WEXITSTATUS(status) == 0) {
pjdlog_debug(1,
"Worker process exited gracefully (pid=%u).",
(unsigned int)pid);
} else {
pjdlog_error("Worker process failed (pid=%u, status=%d).",
(unsigned int)pid, WEXITSTATUS(status));
}
res->hr_workerpid = 0;
if (res->hr_role == HAST_ROLE_PRIMARY) {
sleep(1);
pjdlog_info("Restarting worker process.");
hastd_primary(res);
}
pjdlog_prefix_set("%s", "");
}
}
static void
hastd_reload(void)
{
/* TODO */
pjdlog_warning("Configuration reload is not implemented.");
}
static void
listen_accept(void)
{
struct hast_resource *res;
struct proto_conn *conn;
struct nv *nvin, *nvout, *nverr;
const char *resname;
const unsigned char *token;
char laddr[256], raddr[256];
size_t size;
pid_t pid;
int status;
proto_local_address(cfg->hc_listenconn, laddr, sizeof(laddr));
pjdlog_debug(1, "Accepting connection to %s.", laddr);
if (proto_accept(cfg->hc_listenconn, &conn) < 0) {
pjdlog_errno(LOG_ERR, "Unable to accept connection %s", laddr);
return;
}
proto_local_address(conn, laddr, sizeof(laddr));
proto_remote_address(conn, raddr, sizeof(raddr));
pjdlog_info("Connection from %s to %s.", laddr, raddr);
nvin = nvout = nverr = NULL;
/*
* Before receiving any data see if remote host have access to any
* resource.
*/
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (proto_address_match(conn, res->hr_remoteaddr))
break;
}
if (res == NULL) {
pjdlog_error("Client %s isn't known.", raddr);
goto close;
}
/* Ok, remote host can access at least one resource. */
if (hast_proto_recv_hdr(conn, &nvin) < 0) {
pjdlog_errno(LOG_ERR, "Unable to receive header from %s",
raddr);
goto close;
}
resname = nv_get_string(nvin, "resource");
if (resname == NULL) {
pjdlog_error("No 'resource' field in the header received from %s.",
raddr);
goto close;
}
pjdlog_debug(2, "%s: resource=%s", raddr, resname);
token = nv_get_uint8_array(nvin, &size, "token");
/*
* NULL token means that this is first conection.
*/
if (token != NULL && size != sizeof(res->hr_token)) {
pjdlog_error("Received token of invalid size from %s (expected %zu, got %zu).",
raddr, sizeof(res->hr_token), size);
goto close;
}
/*
* From now on we want to send errors to the remote node.
*/
nverr = nv_alloc();
/* Find resource related to this connection. */
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (strcmp(resname, res->hr_name) == 0)
break;
}
/* Have we found the resource? */
if (res == NULL) {
pjdlog_error("No resource '%s' as requested by %s.",
resname, raddr);
nv_add_stringf(nverr, "errmsg", "Resource not configured.");
goto fail;
}
/* Now that we know resource name setup log prefix. */
pjdlog_prefix_set("[%s] (%s) ", res->hr_name, role2str(res->hr_role));
/* Does the remote host have access to this resource? */
if (!proto_address_match(conn, res->hr_remoteaddr)) {
pjdlog_error("Client %s has no access to the resource.", raddr);
nv_add_stringf(nverr, "errmsg", "No access to the resource.");
goto fail;
}
/* Is the resource marked as secondary? */
if (res->hr_role != HAST_ROLE_SECONDARY) {
pjdlog_error("We act as %s for the resource and not as %s as requested by %s.",
role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY),
raddr);
nv_add_stringf(nverr, "errmsg",
"Remote node acts as %s for the resource and not as %s.",
role2str(res->hr_role), role2str(HAST_ROLE_SECONDARY));
goto fail;
}
/* Does token (if exists) match? */
if (token != NULL && memcmp(token, res->hr_token,
sizeof(res->hr_token)) != 0) {
pjdlog_error("Token received from %s doesn't match.", raddr);
nv_add_stringf(nverr, "errmsg", "Toke doesn't match.");
goto fail;
}
/*
* If there is no token, but we have half-open connection
* (only remotein) or full connection (worker process is running)
* we have to cancel those and accept the new connection.
*/
if (token == NULL) {
assert(res->hr_remoteout == NULL);
pjdlog_debug(1, "Initial connection from %s.", raddr);
if (res->hr_workerpid != 0) {
assert(res->hr_remotein == NULL);
pjdlog_debug(1,
"Worker process exists (pid=%u), stopping it.",
(unsigned int)res->hr_workerpid);
/* Stop child process. */
if (kill(res->hr_workerpid, SIGINT) < 0) {
pjdlog_errno(LOG_ERR,
"Unable to stop worker process (pid=%u)",
(unsigned int)res->hr_workerpid);
/*
* Other than logging the problem we
* ignore it - nothing smart to do.
*/
}
/* Wait for it to exit. */
else if ((pid = waitpid(res->hr_workerpid,
&status, 0)) != res->hr_workerpid) {
pjdlog_errno(LOG_ERR,
"Waiting for worker process (pid=%u) failed",
(unsigned int)res->hr_workerpid);
/* See above. */
} else if (status != 0) {
pjdlog_error("Worker process (pid=%u) exited ungracefully: status=%d.",
(unsigned int)res->hr_workerpid, status);
/* See above. */
} else {
pjdlog_debug(1,
"Worker process (pid=%u) exited gracefully.",
(unsigned int)res->hr_workerpid);
}
res->hr_workerpid = 0;
} else if (res->hr_remotein != NULL) {
char oaddr[256];
proto_remote_address(conn, oaddr, sizeof(oaddr));
pjdlog_debug(1,
"Canceling half-open connection from %s on connection from %s.",
oaddr, raddr);
proto_close(res->hr_remotein);
res->hr_remotein = NULL;
}
}
/*
* Checks and cleanups are done.
*/
if (token == NULL) {
arc4random_buf(res->hr_token, sizeof(res->hr_token));
nvout = nv_alloc();
nv_add_uint8_array(nvout, res->hr_token,
sizeof(res->hr_token), "token");
if (nv_error(nvout) != 0) {
pjdlog_common(LOG_ERR, 0, nv_error(nvout),
"Unable to prepare return header for %s", raddr);
nv_add_stringf(nverr, "errmsg",
"Remote node was unable to prepare return header: %s.",
strerror(nv_error(nvout)));
goto fail;
}
if (hast_proto_send(NULL, conn, nvout, NULL, 0) < 0) {
int error = errno;
pjdlog_errno(LOG_ERR, "Unable to send response to %s",
raddr);
nv_add_stringf(nverr, "errmsg",
"Remote node was unable to send response: %s.",
strerror(error));
goto fail;
}
res->hr_remotein = conn;
pjdlog_debug(1, "Incoming connection from %s configured.",
raddr);
} else {
res->hr_remoteout = conn;
pjdlog_debug(1, "Outgoing connection to %s configured.", raddr);
hastd_secondary(res, nvin);
}
nv_free(nvin);
nv_free(nvout);
nv_free(nverr);
pjdlog_prefix_set("%s", "");
return;
fail:
if (nv_error(nverr) != 0) {
pjdlog_common(LOG_ERR, 0, nv_error(nverr),
"Unable to prepare error header for %s", raddr);
goto close;
}
if (hast_proto_send(NULL, conn, nverr, NULL, 0) < 0) {
pjdlog_errno(LOG_ERR, "Unable to send error to %s", raddr);
goto close;
}
close:
if (nvin != NULL)
nv_free(nvin);
if (nvout != NULL)
nv_free(nvout);
if (nverr != NULL)
nv_free(nverr);
proto_close(conn);
pjdlog_prefix_set("%s", "");
}
static void
main_loop(void)
{
fd_set rfds, wfds;
int fd, maxfd, ret;
for (;;) {
if (sigchld_received) {
sigchld_received = false;
child_exit();
}
if (sighup_received) {
sighup_received = false;
hastd_reload();
}
maxfd = 0;
FD_ZERO(&rfds);
FD_ZERO(&wfds);
/* Setup descriptors for select(2). */
#define SETUP_FD(conn) do { \
fd = proto_descriptor(conn); \
if (fd >= 0) { \
maxfd = fd > maxfd ? fd : maxfd; \
FD_SET(fd, &rfds); \
FD_SET(fd, &wfds); \
} \
} while (0)
SETUP_FD(cfg->hc_controlconn);
SETUP_FD(cfg->hc_listenconn);
#undef SETUP_FD
ret = select(maxfd + 1, &rfds, &wfds, NULL, NULL);
if (ret == -1) {
if (errno == EINTR)
continue;
KEEP_ERRNO((void)pidfile_remove(pfh));
pjdlog_exit(EX_OSERR, "select() failed");
}
#define ISSET_FD(conn) \
(FD_ISSET((fd = proto_descriptor(conn)), &rfds) || FD_ISSET(fd, &wfds))
if (ISSET_FD(cfg->hc_controlconn))
control_handle(cfg);
if (ISSET_FD(cfg->hc_listenconn))
listen_accept();
#undef ISSET_FD
}
}
int
main(int argc, char *argv[])
{
const char *pidfile;
pid_t otherpid;
bool foreground;
int debuglevel;
g_gate_load();
foreground = false;
debuglevel = 0;
pidfile = HASTD_PIDFILE;
for (;;) {
int ch;
ch = getopt(argc, argv, "c:dFhP:");
if (ch == -1)
break;
switch (ch) {
case 'c':
cfgpath = optarg;
break;
case 'd':
debuglevel++;
break;
case 'F':
foreground = true;
break;
case 'P':
pidfile = optarg;
break;
case 'h':
default:
usage();
}
}
argc -= optind;
argv += optind;
pjdlog_debug_set(debuglevel);
pfh = pidfile_open(pidfile, 0600, &otherpid);
if (pfh == NULL) {
if (errno == EEXIST) {
pjdlog_exitx(EX_TEMPFAIL,
"Another hastd is already running, pid: %jd.",
(intmax_t)otherpid);
}
/* If we cannot create pidfile from other reasons, only warn. */
pjdlog_errno(LOG_WARNING, "Cannot open or create pidfile");
}
cfg = yy_config_parse(cfgpath);
assert(cfg != NULL);
signal(SIGHUP, sighandler);
signal(SIGCHLD, sighandler);
/* Listen on control address. */
if (proto_server(cfg->hc_controladdr, &cfg->hc_controlconn) < 0) {
KEEP_ERRNO((void)pidfile_remove(pfh));
pjdlog_exit(EX_OSERR, "Unable to listen on control address %s",
cfg->hc_controladdr);
}
/* Listen for remote connections. */
if (proto_server(cfg->hc_listenaddr, &cfg->hc_listenconn) < 0) {
KEEP_ERRNO((void)pidfile_remove(pfh));
pjdlog_exit(EX_OSERR, "Unable to listen on address %s",
cfg->hc_listenaddr);
}
if (!foreground) {
if (daemon(0, 0) < 0) {
KEEP_ERRNO((void)pidfile_remove(pfh));
pjdlog_exit(EX_OSERR, "Unable to daemonize");
}
/* Start logging to syslog. */
pjdlog_mode_set(PJDLOG_MODE_SYSLOG);
/* Write PID to a file. */
if (pidfile_write(pfh) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to write PID to a file");
}
}
main_loop();
exit(0);
}

48
sbin/hastd/hastd.h Normal file
View File

@ -0,0 +1,48 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _HASTD_H_
#define _HASTD_H_
#include <sys/param.h>
#include <libutil.h>
#include <nv.h>
#include "hast.h"
extern bool sigexit_received;
extern struct pidfh *pfh;
void hastd_primary(struct hast_resource *res);
void hastd_secondary(struct hast_resource *res, struct nv *nvin);
#endif /* !_HASTD_H_ */

148
sbin/hastd/hooks.c Normal file
View File

@ -0,0 +1,148 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/wait.h>
#include <assert.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <syslog.h>
#include <libgen.h>
#include <paths.h>
#include <pjdlog.h>
#include "hooks.h"
static void
descriptors(void)
{
long maxfd;
int fd;
/*
* Close all descriptors.
*/
maxfd = sysconf(_SC_OPEN_MAX);
if (maxfd < 0) {
pjdlog_errno(LOG_WARNING, "sysconf(_SC_OPEN_MAX) failed");
maxfd = 1024;
}
for (fd = 0; fd <= maxfd; fd++)
close(fd);
/*
* Redirect stdin, stdout and stderr to /dev/null.
*/
fd = open(_PATH_DEVNULL, O_RDONLY);
if (fd < 0) {
pjdlog_errno(LOG_WARNING, "Unable to open %s for reading",
_PATH_DEVNULL);
} else if (fd != STDIN_FILENO) {
if (dup2(fd, STDIN_FILENO) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to duplicate descriptor for stdin");
}
close(fd);
}
fd = open(_PATH_DEVNULL, O_WRONLY);
if (fd < 0) {
pjdlog_errno(LOG_WARNING, "Unable to open %s for writing",
_PATH_DEVNULL);
} else {
if (fd != STDOUT_FILENO && dup2(fd, STDOUT_FILENO) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to duplicate descriptor for stdout");
}
if (fd != STDERR_FILENO && dup2(fd, STDERR_FILENO) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to duplicate descriptor for stderr");
}
if (fd != STDOUT_FILENO && fd != STDERR_FILENO)
close(fd);
}
}
int
hook_exec(const char *path, ...)
{
va_list ap;
int ret;
va_start(ap, path);
ret = hook_execv(path, ap);
va_end(ap);
return (ret);
}
int
hook_execv(const char *path, va_list ap)
{
char *args[64];
unsigned int ii;
pid_t pid, wpid;
int status;
if (path == NULL || path[0] == '\0')
return (0);
memset(args, 0, sizeof(args));
args[0] = basename(path);
for (ii = 1; ii < sizeof(args) / sizeof(args[0]); ii++) {
args[ii] = va_arg(ap, char *);
if (args[ii] == NULL)
break;
}
assert(ii < sizeof(args) / sizeof(args[0]));
pid = fork();
switch (pid) {
case -1: /* Error. */
pjdlog_errno(LOG_ERR, "Unable to fork %s", path);
return (-1);
case 0: /* Child. */
descriptors();
execv(path, args);
pjdlog_errno(LOG_ERR, "Unable to execute %s", path);
exit(EX_SOFTWARE);
default: /* Parent. */
break;
}
wpid = waitpid(pid, &status, 0);
assert(wpid == pid);
return (WEXITSTATUS(status));
}

40
sbin/hastd/hooks.h Normal file
View File

@ -0,0 +1,40 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _HOOKS_H_
#define _HOOKS_H_
#include <stdarg.h>
int hook_exec(const char *path, ...);
int hook_execv(const char *path, va_list ap);
#endif /* !_HOOKS_H_ */

222
sbin/hastd/metadata.c Normal file
View File

@ -0,0 +1,222 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <strings.h>
#include <unistd.h>
#include <ebuf.h>
#include <nv.h>
#include <pjdlog.h>
#include <subr.h>
#include "metadata.h"
int
metadata_read(struct hast_resource *res, bool openrw)
{
unsigned char *buf;
struct ebuf *eb;
struct nv *nv;
ssize_t done;
const char *str;
int rerrno;
bool opened_here;
opened_here = false;
rerrno = 0;
/*
* Is this first metadata_read() call for this resource?
*/
if (res->hr_localfd == -1) {
if (provinfo(res, openrw) < 0) {
rerrno = errno;
goto fail;
}
opened_here = true;
pjdlog_debug(1, "Obtained info about %s.", res->hr_localpath);
if (openrw) {
if (flock(res->hr_localfd, LOCK_EX | LOCK_NB) < 0) {
rerrno = errno;
if (errno == EOPNOTSUPP) {
pjdlog_warning("Unable to lock %s (operation not supported), but continuing.",
res->hr_localpath);
} else {
pjdlog_errno(LOG_ERR,
"Unable to lock %s",
res->hr_localpath);
goto fail;
}
}
pjdlog_debug(1, "Locked %s.", res->hr_localpath);
}
}
eb = ebuf_alloc(METADATA_SIZE);
if (eb == NULL) {
rerrno = errno;
pjdlog_errno(LOG_ERR,
"Unable to allocate memory to read metadata");
goto fail;
}
if (ebuf_add_tail(eb, NULL, METADATA_SIZE) < 0) {
rerrno = errno;
pjdlog_errno(LOG_ERR,
"Unable to allocate memory to read metadata");
goto fail;
}
buf = ebuf_data(eb, NULL);
assert(buf != NULL);
done = pread(res->hr_localfd, buf, METADATA_SIZE, 0);
if (done < 0 || done != METADATA_SIZE) {
rerrno = errno;
pjdlog_errno(LOG_ERR, "Unable to read metadata");
ebuf_free(eb);
goto fail;
}
nv = nv_ntoh(eb);
if (nv == NULL) {
rerrno = errno;
pjdlog_errno(LOG_ERR, "Metadata read from %s is invalid",
res->hr_localpath);
ebuf_free(eb);
goto fail;
}
str = nv_get_string(nv, "resource");
if (strcmp(str, res->hr_name) != 0) {
pjdlog_error("Provider %s is not part of resource %s.",
res->hr_localpath, res->hr_name);
nv_free(nv);
goto fail;
}
res->hr_datasize = nv_get_uint64(nv, "datasize");
res->hr_extentsize = (int)nv_get_uint32(nv, "extentsize");
res->hr_keepdirty = (int)nv_get_uint32(nv, "keepdirty");
res->hr_localoff = nv_get_uint64(nv, "offset");
res->hr_resuid = nv_get_uint64(nv, "resuid");
if (res->hr_role != HAST_ROLE_PRIMARY) {
/* Secondary or init role. */
res->hr_secondary_localcnt = nv_get_uint64(nv, "localcnt");
res->hr_secondary_remotecnt = nv_get_uint64(nv, "remotecnt");
}
if (res->hr_role != HAST_ROLE_SECONDARY) {
/* Primary or init role. */
res->hr_primary_localcnt = nv_get_uint64(nv, "localcnt");
res->hr_primary_remotecnt = nv_get_uint64(nv, "remotecnt");
}
str = nv_get_string(nv, "prevrole");
if (str != NULL) {
if (strcmp(str, "primary") == 0)
res->hr_previous_role = HAST_ROLE_PRIMARY;
else if (strcmp(str, "secondary") == 0)
res->hr_previous_role = HAST_ROLE_SECONDARY;
}
if (nv_error(nv) != 0) {
errno = rerrno = nv_error(nv);
pjdlog_errno(LOG_ERR, "Unable to read metadata from %s",
res->hr_localpath);
nv_free(nv);
goto fail;
}
return (0);
fail:
if (opened_here) {
close(res->hr_localfd);
res->hr_localfd = -1;
}
errno = rerrno;
return (-1);
}
int
metadata_write(struct hast_resource *res)
{
struct ebuf *eb;
struct nv *nv;
unsigned char *buf, *ptr;
size_t size;
ssize_t done;
buf = calloc(1, METADATA_SIZE);
if (buf == NULL) {
pjdlog_error("Unable to allocate %zu bytes for metadata.",
(size_t)METADATA_SIZE);
return (-1);
}
nv = nv_alloc();
nv_add_string(nv, res->hr_name, "resource");
nv_add_uint64(nv, (uint64_t)res->hr_datasize, "datasize");
nv_add_uint32(nv, (uint32_t)res->hr_extentsize, "extentsize");
nv_add_uint32(nv, (uint32_t)res->hr_keepdirty, "keepdirty");
nv_add_uint64(nv, (uint64_t)res->hr_localoff, "offset");
nv_add_uint64(nv, res->hr_resuid, "resuid");
if (res->hr_role == HAST_ROLE_PRIMARY ||
res->hr_role == HAST_ROLE_INIT) {
nv_add_uint64(nv, res->hr_primary_localcnt, "localcnt");
nv_add_uint64(nv, res->hr_primary_remotecnt, "remotecnt");
} else /* if (res->hr_role == HAST_ROLE_SECONDARY) */ {
assert(res->hr_role == HAST_ROLE_SECONDARY);
nv_add_uint64(nv, res->hr_secondary_localcnt, "localcnt");
nv_add_uint64(nv, res->hr_secondary_remotecnt, "remotecnt");
}
nv_add_string(nv, role2str(res->hr_role), "prevrole");
if (nv_error(nv) != 0) {
pjdlog_error("Unable to create metadata.");
goto fail;
}
res->hr_previous_role = res->hr_role;
eb = nv_hton(nv);
assert(eb != NULL);
ptr = ebuf_data(eb, &size);
assert(ptr != NULL);
assert(size < METADATA_SIZE);
bcopy(ptr, buf, size);
done = pwrite(res->hr_localfd, buf, METADATA_SIZE, 0);
if (done < 0 || done != METADATA_SIZE) {
pjdlog_errno(LOG_ERR, "Unable to write metadata");
goto fail;
}
return (0);
fail:
free(buf);
nv_free(nv);
return (-1);
}

48
sbin/hastd/metadata.h Normal file
View File

@ -0,0 +1,48 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _METADATA_H_
#define _METADATA_H_
#include <stdbool.h>
#include <hast.h>
/*
* Maximum size of metadata.
* XXX: We should take sector size into account.
*/
#define METADATA_SIZE 4096
int metadata_read(struct hast_resource *res, bool openrw);
int metadata_write(struct hast_resource *res);
#endif /* !_METADATA_H_ */

882
sbin/hastd/nv.c Normal file
View File

@ -0,0 +1,882 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/endian.h>
#include <assert.h>
#include <bitstring.h>
#include <errno.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ebuf.h>
#include <nv.h>
#define NV_MAGIC 0xaea1e
struct nv {
int nv_magic;
int nv_error;
struct ebuf *nv_ebuf;
};
struct nvhdr {
uint8_t nvh_type;
uint8_t nvh_namesize;
uint32_t nvh_dsize;
char nvh_name[0];
} __packed;
#define NVH_DATA(nvh) ((unsigned char *)nvh + NVH_HSIZE(nvh))
#define NVH_HSIZE(nvh) \
(sizeof(struct nvhdr) + roundup2((nvh)->nvh_namesize, 8))
#define NVH_DSIZE(nvh) \
(((nvh)->nvh_type & NV_ORDER_MASK) == NV_ORDER_HOST ? \
(nvh)->nvh_dsize : \
le32toh((nvh)->nvh_dsize))
#define NVH_SIZE(nvh) (NVH_HSIZE(nvh) + roundup2(NVH_DSIZE(nvh), 8))
#define NV_CHECK(nv) do { \
assert((nv) != NULL); \
assert((nv)->nv_magic == NV_MAGIC); \
} while (0)
static void nv_add(struct nv *nv, const unsigned char *value, size_t vsize,
int type, const char *name);
static void nv_addv(struct nv *nv, const unsigned char *value, size_t vsize,
int type, const char *namefmt, va_list nameap);
static struct nvhdr *nv_find(struct nv *nv, int type, const char *namefmt,
va_list nameap);
static void nv_swap(struct nvhdr *nvh, bool tohost);
/*
* Allocate and initialize new nv structure.
* Return NULL in case of malloc(3) failure.
*/
struct nv *
nv_alloc(void)
{
struct nv *nv;
nv = malloc(sizeof(*nv));
if (nv == NULL)
return (NULL);
nv->nv_ebuf = ebuf_alloc(0);
if (nv->nv_ebuf == NULL) {
free(nv);
return (NULL);
}
nv->nv_error = 0;
nv->nv_magic = NV_MAGIC;
return (nv);
}
/*
* Free the given nv structure.
*/
void
nv_free(struct nv *nv)
{
if (nv == NULL)
return;
NV_CHECK(nv);
nv->nv_magic = 0;
ebuf_free(nv->nv_ebuf);
free(nv);
}
/*
* Return error for the given nv structure.
*/
int
nv_error(const struct nv *nv)
{
if (nv == NULL)
return (ENOMEM);
NV_CHECK(nv);
return (nv->nv_error);
}
/*
* Set error for the given nv structure and return previous error.
*/
int
nv_set_error(struct nv *nv, int error)
{
int preverr;
if (nv == NULL)
return (ENOMEM);
NV_CHECK(nv);
preverr = nv->nv_error;
nv->nv_error = error;
return (preverr);
}
/*
* Validate correctness of the entire nv structure and all its elements.
* If extrap is not NULL, store number of extra bytes at the end of the buffer.
*/
int
nv_validate(struct nv *nv, size_t *extrap)
{
struct nvhdr *nvh;
unsigned char *data, *ptr;
size_t dsize, size, vsize;
int error;
if (nv == NULL) {
errno = ENOMEM;
return (-1);
}
NV_CHECK(nv);
assert(nv->nv_error == 0);
/* TODO: Check that names are unique? */
error = 0;
ptr = ebuf_data(nv->nv_ebuf, &size);
while (size > 0) {
/*
* Zeros at the end of the buffer are acceptable.
*/
if (ptr[0] == '\0')
break;
/*
* Minimum size at this point is size of nvhdr structure, one
* character long name plus terminating '\0'.
*/
if (size < sizeof(*nvh) + 2) {
error = EINVAL;
break;
}
nvh = (struct nvhdr *)ptr;
if (size < NVH_HSIZE(nvh)) {
error = EINVAL;
break;
}
if (nvh->nvh_name[nvh->nvh_namesize - 1] != '\0') {
error = EINVAL;
break;
}
if (strlen(nvh->nvh_name) !=
(size_t)(nvh->nvh_namesize - 1)) {
error = EINVAL;
break;
}
if ((nvh->nvh_type & NV_TYPE_MASK) < NV_TYPE_FIRST ||
(nvh->nvh_type & NV_TYPE_MASK) > NV_TYPE_LAST) {
error = EINVAL;
break;
}
dsize = NVH_DSIZE(nvh);
if (dsize == 0) {
error = EINVAL;
break;
}
if (size < NVH_SIZE(nvh)) {
error = EINVAL;
break;
}
vsize = 0;
switch (nvh->nvh_type & NV_TYPE_MASK) {
case NV_TYPE_INT8:
case NV_TYPE_UINT8:
if (vsize == 0)
vsize = 1;
/* FALLTHOUGH */
case NV_TYPE_INT16:
case NV_TYPE_UINT16:
if (vsize == 0)
vsize = 2;
/* FALLTHOUGH */
case NV_TYPE_INT32:
case NV_TYPE_UINT32:
if (vsize == 0)
vsize = 4;
/* FALLTHOUGH */
case NV_TYPE_INT64:
case NV_TYPE_UINT64:
if (vsize == 0)
vsize = 8;
if (dsize != vsize) {
error = EINVAL;
break;
}
break;
case NV_TYPE_INT8_ARRAY:
case NV_TYPE_UINT8_ARRAY:
break;
case NV_TYPE_INT16_ARRAY:
case NV_TYPE_UINT16_ARRAY:
if (vsize == 0)
vsize = 2;
/* FALLTHOUGH */
case NV_TYPE_INT32_ARRAY:
case NV_TYPE_UINT32_ARRAY:
if (vsize == 0)
vsize = 4;
/* FALLTHOUGH */
case NV_TYPE_INT64_ARRAY:
case NV_TYPE_UINT64_ARRAY:
if (vsize == 0)
vsize = 8;
if ((dsize % vsize) != 0) {
error = EINVAL;
break;
}
break;
case NV_TYPE_STRING:
data = NVH_DATA(nvh);
if (data[dsize - 1] != '\0') {
error = EINVAL;
break;
}
if (strlen((char *)data) != dsize - 1) {
error = EINVAL;
break;
}
break;
default:
assert(!"invalid condition");
}
if (error != 0)
break;
ptr += NVH_SIZE(nvh);
size -= NVH_SIZE(nvh);
}
if (error != 0) {
errno = error;
if (nv->nv_error == 0)
nv->nv_error = error;
return (-1);
}
if (extrap != NULL)
*extrap = size;
return (0);
}
/*
* Convert the given nv structure to network byte order and return ebuf
* structure.
*/
struct ebuf *
nv_hton(struct nv *nv)
{
struct nvhdr *nvh;
unsigned char *ptr;
size_t size;
NV_CHECK(nv);
assert(nv->nv_error == 0);
ptr = ebuf_data(nv->nv_ebuf, &size);
while (size > 0) {
/*
* Minimum size at this point is size of nvhdr structure,
* one character long name plus terminating '\0'.
*/
assert(size >= sizeof(*nvh) + 2);
nvh = (struct nvhdr *)ptr;
assert(NVH_SIZE(nvh) <= size);
nv_swap(nvh, false);
ptr += NVH_SIZE(nvh);
size -= NVH_SIZE(nvh);
}
return (nv->nv_ebuf);
}
/*
* Create nv structure based on ebuf received from the network.
*/
struct nv *
nv_ntoh(struct ebuf *eb)
{
struct nv *nv;
size_t extra;
int rerrno;
assert(eb != NULL);
nv = malloc(sizeof(*nv));
if (nv == NULL)
return (NULL);
nv->nv_error = 0;
nv->nv_ebuf = eb;
nv->nv_magic = NV_MAGIC;
if (nv_validate(nv, &extra) < 0) {
rerrno = errno;
nv->nv_magic = 0;
free(nv);
errno = rerrno;
return (NULL);
}
/*
* Remove extra zeros at the end of the buffer.
*/
ebuf_del_tail(eb, extra);
return (nv);
}
#define NV_DEFINE_ADD(type, TYPE) \
void \
nv_add_##type(struct nv *nv, type##_t value, const char *namefmt, ...) \
{ \
va_list nameap; \
\
va_start(nameap, namefmt); \
nv_addv(nv, (unsigned char *)&value, sizeof(value), \
NV_TYPE_##TYPE, namefmt, nameap); \
va_end(nameap); \
}
NV_DEFINE_ADD(int8, INT8)
NV_DEFINE_ADD(uint8, UINT8)
NV_DEFINE_ADD(int16, INT16)
NV_DEFINE_ADD(uint16, UINT16)
NV_DEFINE_ADD(int32, INT32)
NV_DEFINE_ADD(uint32, UINT32)
NV_DEFINE_ADD(int64, INT64)
NV_DEFINE_ADD(uint64, UINT64)
#undef NV_DEFINE_ADD
#define NV_DEFINE_ADD_ARRAY(type, TYPE) \
void \
nv_add_##type##_array(struct nv *nv, const type##_t *value, \
size_t nsize, const char *namefmt, ...) \
{ \
va_list nameap; \
\
va_start(nameap, namefmt); \
nv_addv(nv, (const unsigned char *)value, \
sizeof(value[0]) * nsize, NV_TYPE_##TYPE##_ARRAY, namefmt, \
nameap); \
va_end(nameap); \
}
NV_DEFINE_ADD_ARRAY(int8, INT8)
NV_DEFINE_ADD_ARRAY(uint8, UINT8)
NV_DEFINE_ADD_ARRAY(int16, INT16)
NV_DEFINE_ADD_ARRAY(uint16, UINT16)
NV_DEFINE_ADD_ARRAY(int32, INT32)
NV_DEFINE_ADD_ARRAY(uint32, UINT32)
NV_DEFINE_ADD_ARRAY(int64, INT64)
NV_DEFINE_ADD_ARRAY(uint64, UINT64)
#undef NV_DEFINE_ADD_ARRAY
void
nv_add_string(struct nv *nv, const char *value, const char *namefmt, ...)
{
va_list nameap;
size_t size;
size = strlen(value) + 1;
va_start(nameap, namefmt);
nv_addv(nv, (const unsigned char *)value, size, NV_TYPE_STRING,
namefmt, nameap);
va_end(nameap);
}
void
nv_add_stringf(struct nv *nv, const char *name, const char *valuefmt, ...)
{
va_list valueap;
va_start(valueap, valuefmt);
nv_add_stringv(nv, name, valuefmt, valueap);
va_end(valueap);
}
void
nv_add_stringv(struct nv *nv, const char *name, const char *valuefmt,
va_list valueap)
{
char *value;
ssize_t size;
size = vasprintf(&value, valuefmt, valueap);
if (size < 0) {
if (nv->nv_error == 0)
nv->nv_error = ENOMEM;
return;
}
size++;
nv_add(nv, (const unsigned char *)value, size, NV_TYPE_STRING, name);
free(value);
}
#define NV_DEFINE_GET(type, TYPE) \
type##_t \
nv_get_##type(struct nv *nv, const char *namefmt, ...) \
{ \
struct nvhdr *nvh; \
va_list nameap; \
type##_t value; \
\
va_start(nameap, namefmt); \
nvh = nv_find(nv, NV_TYPE_##TYPE, namefmt, nameap); \
va_end(nameap); \
if (nvh == NULL) \
return (0); \
assert((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_HOST); \
assert(sizeof(value) == nvh->nvh_dsize); \
bcopy(NVH_DATA(nvh), &value, sizeof(value)); \
\
return (value); \
}
NV_DEFINE_GET(int8, INT8)
NV_DEFINE_GET(uint8, UINT8)
NV_DEFINE_GET(int16, INT16)
NV_DEFINE_GET(uint16, UINT16)
NV_DEFINE_GET(int32, INT32)
NV_DEFINE_GET(uint32, UINT32)
NV_DEFINE_GET(int64, INT64)
NV_DEFINE_GET(uint64, UINT64)
#undef NV_DEFINE_GET
#define NV_DEFINE_GET_ARRAY(type, TYPE) \
const type##_t * \
nv_get_##type##_array(struct nv *nv, size_t *sizep, \
const char *namefmt, ...) \
{ \
struct nvhdr *nvh; \
va_list nameap; \
\
va_start(nameap, namefmt); \
nvh = nv_find(nv, NV_TYPE_##TYPE##_ARRAY, namefmt, nameap); \
va_end(nameap); \
if (nvh == NULL) \
return (NULL); \
assert((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_HOST); \
assert((nvh->nvh_dsize % sizeof(type##_t)) == 0); \
if (sizep != NULL) \
*sizep = nvh->nvh_dsize / sizeof(type##_t); \
return ((type##_t *)(void *)NVH_DATA(nvh)); \
}
NV_DEFINE_GET_ARRAY(int8, INT8)
NV_DEFINE_GET_ARRAY(uint8, UINT8)
NV_DEFINE_GET_ARRAY(int16, INT16)
NV_DEFINE_GET_ARRAY(uint16, UINT16)
NV_DEFINE_GET_ARRAY(int32, INT32)
NV_DEFINE_GET_ARRAY(uint32, UINT32)
NV_DEFINE_GET_ARRAY(int64, INT64)
NV_DEFINE_GET_ARRAY(uint64, UINT64)
#undef NV_DEFINE_GET_ARRAY
const char *
nv_get_string(struct nv *nv, const char *namefmt, ...)
{
struct nvhdr *nvh;
va_list nameap;
char *str;
va_start(nameap, namefmt);
nvh = nv_find(nv, NV_TYPE_STRING, namefmt, nameap);
va_end(nameap);
if (nvh == NULL)
return (NULL);
assert((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_HOST);
assert(nvh->nvh_dsize >= 1);
str = NVH_DATA(nvh);
assert(str[nvh->nvh_dsize - 1] == '\0');
assert(strlen(str) == nvh->nvh_dsize - 1);
return (str);
}
/*
* Dump content of the nv structure.
*/
void
nv_dump(struct nv *nv)
{
struct nvhdr *nvh;
unsigned char *data, *ptr;
size_t dsize, size;
unsigned int ii;
bool swap;
if (nv_validate(nv, NULL) < 0) {
printf("error: %d\n", errno);
return;
}
NV_CHECK(nv);
assert(nv->nv_error == 0);
ptr = ebuf_data(nv->nv_ebuf, &size);
while (size > 0) {
assert(size >= sizeof(*nvh) + 2);
nvh = (struct nvhdr *)ptr;
assert(size >= NVH_SIZE(nvh));
swap = ((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_NETWORK);
dsize = NVH_DSIZE(nvh);
data = NVH_DATA(nvh);
printf(" %s", nvh->nvh_name);
switch (nvh->nvh_type & NV_TYPE_MASK) {
case NV_TYPE_INT8:
printf("(int8): %jd", (intmax_t)(*(int8_t *)data));
break;
case NV_TYPE_UINT8:
printf("(uint8): %ju", (uintmax_t)(*(uint8_t *)data));
break;
case NV_TYPE_INT16:
printf("(int16): %jd", swap ?
(intmax_t)le16toh(*(int16_t *)(void *)data) :
(intmax_t)*(int16_t *)(void *)data);
break;
case NV_TYPE_UINT16:
printf("(uint16): %ju", swap ?
(uintmax_t)le16toh(*(uint16_t *)(void *)data) :
(uintmax_t)*(uint16_t *)(void *)data);
break;
case NV_TYPE_INT32:
printf("(int32): %jd", swap ?
(intmax_t)le32toh(*(int32_t *)(void *)data) :
(intmax_t)*(int32_t *)(void *)data);
break;
case NV_TYPE_UINT32:
printf("(uint32): %ju", swap ?
(uintmax_t)le32toh(*(uint32_t *)(void *)data) :
(uintmax_t)*(uint32_t *)(void *)data);
break;
case NV_TYPE_INT64:
printf("(int64): %jd", swap ?
(intmax_t)le64toh(*(int64_t *)(void *)data) :
(intmax_t)*(int64_t *)(void *)data);
break;
case NV_TYPE_UINT64:
printf("(uint64): %ju", swap ?
(uintmax_t)le64toh(*(uint64_t *)(void *)data) :
(uintmax_t)*(uint64_t *)(void *)data);
break;
case NV_TYPE_INT8_ARRAY:
printf("(int8 array):");
for (ii = 0; ii < dsize; ii++)
printf(" %jd", (intmax_t)((int8_t *)data)[ii]);
break;
case NV_TYPE_UINT8_ARRAY:
printf("(uint8 array):");
for (ii = 0; ii < dsize; ii++)
printf(" %ju", (uintmax_t)((uint8_t *)data)[ii]);
break;
case NV_TYPE_INT16_ARRAY:
printf("(int16 array):");
for (ii = 0; ii < dsize / 2; ii++) {
printf(" %jd", swap ?
(intmax_t)le16toh(((int16_t *)(void *)data)[ii]) :
(intmax_t)((int16_t *)(void *)data)[ii]);
}
break;
case NV_TYPE_UINT16_ARRAY:
printf("(uint16 array):");
for (ii = 0; ii < dsize / 2; ii++) {
printf(" %ju", swap ?
(uintmax_t)le16toh(((uint16_t *)(void *)data)[ii]) :
(uintmax_t)((uint16_t *)(void *)data)[ii]);
}
break;
case NV_TYPE_INT32_ARRAY:
printf("(int32 array):");
for (ii = 0; ii < dsize / 4; ii++) {
printf(" %jd", swap ?
(intmax_t)le32toh(((int32_t *)(void *)data)[ii]) :
(intmax_t)((int32_t *)(void *)data)[ii]);
}
break;
case NV_TYPE_UINT32_ARRAY:
printf("(uint32 array):");
for (ii = 0; ii < dsize / 4; ii++) {
printf(" %ju", swap ?
(uintmax_t)le32toh(((uint32_t *)(void *)data)[ii]) :
(uintmax_t)((uint32_t *)(void *)data)[ii]);
}
break;
case NV_TYPE_INT64_ARRAY:
printf("(int64 array):");
for (ii = 0; ii < dsize / 8; ii++) {
printf(" %ju", swap ?
(uintmax_t)le64toh(((uint64_t *)(void *)data)[ii]) :
(uintmax_t)((uint64_t *)(void *)data)[ii]);
}
break;
case NV_TYPE_UINT64_ARRAY:
printf("(uint64 array):");
for (ii = 0; ii < dsize / 8; ii++) {
printf(" %ju", swap ?
(uintmax_t)le64toh(((uint64_t *)(void *)data)[ii]) :
(uintmax_t)((uint64_t *)(void *)data)[ii]);
}
break;
case NV_TYPE_STRING:
printf("(string): %s", (char *)data);
break;
default:
assert(!"invalid condition");
}
printf("\n");
ptr += NVH_SIZE(nvh);
size -= NVH_SIZE(nvh);
}
}
/*
* Local routines below.
*/
static void
nv_add(struct nv *nv, const unsigned char *value, size_t vsize, int type,
const char *name)
{
static unsigned char align[7];
struct nvhdr *nvh;
size_t namesize;
if (nv == NULL) {
errno = ENOMEM;
return;
}
NV_CHECK(nv);
namesize = strlen(name) + 1;
nvh = malloc(sizeof(*nvh) + roundup2(namesize, 8));
if (nvh == NULL) {
if (nv->nv_error == 0)
nv->nv_error = ENOMEM;
return;
}
nvh->nvh_type = NV_ORDER_HOST | type;
nvh->nvh_namesize = (uint8_t)namesize;
nvh->nvh_dsize = (uint32_t)vsize;
bcopy(name, nvh->nvh_name, namesize);
/* Add header first. */
if (ebuf_add_tail(nv->nv_ebuf, nvh, NVH_HSIZE(nvh)) < 0) {
assert(errno != 0);
if (nv->nv_error == 0)
nv->nv_error = errno;
return;
}
/* Add the actual data. */
if (ebuf_add_tail(nv->nv_ebuf, value, vsize) < 0) {
assert(errno != 0);
if (nv->nv_error == 0)
nv->nv_error = errno;
return;
}
/* Align the data (if needed). */
vsize = roundup2(vsize, 8) - vsize;
if (vsize == 0)
return;
assert(vsize > 0 && vsize <= sizeof(align));
if (ebuf_add_tail(nv->nv_ebuf, align, vsize) < 0) {
assert(errno != 0);
if (nv->nv_error == 0)
nv->nv_error = errno;
return;
}
}
static void
nv_addv(struct nv *nv, const unsigned char *value, size_t vsize, int type,
const char *namefmt, va_list nameap)
{
char name[255];
size_t namesize;
namesize = vsnprintf(name, sizeof(name), namefmt, nameap);
assert(namesize > 0 && namesize < sizeof(name));
nv_add(nv, value, vsize, type, name);
}
static struct nvhdr *
nv_find(struct nv *nv, int type, const char *namefmt, va_list nameap)
{
char name[255];
struct nvhdr *nvh;
unsigned char *ptr;
size_t size, namesize;
if (nv == NULL) {
errno = ENOMEM;
return (NULL);
}
NV_CHECK(nv);
namesize = vsnprintf(name, sizeof(name), namefmt, nameap);
assert(namesize > 0 && namesize < sizeof(name));
namesize++;
ptr = ebuf_data(nv->nv_ebuf, &size);
while (size > 0) {
assert(size >= sizeof(*nvh) + 2);
nvh = (struct nvhdr *)ptr;
assert(size >= NVH_SIZE(nvh));
nv_swap(nvh, true);
if (strcmp(nvh->nvh_name, name) == 0) {
if ((nvh->nvh_type & NV_TYPE_MASK) != type) {
errno = EINVAL;
if (nv->nv_error == 0)
nv->nv_error = EINVAL;
return (NULL);
}
return (nvh);
}
ptr += NVH_SIZE(nvh);
size -= NVH_SIZE(nvh);
}
errno = ENOENT;
if (nv->nv_error == 0)
nv->nv_error = ENOENT;
return (NULL);
}
static void
nv_swap(struct nvhdr *nvh, bool tohost)
{
unsigned char *data, *end, *p;
size_t vsize;
data = NVH_DATA(nvh);
if (tohost) {
if ((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_HOST)
return;
nvh->nvh_dsize = le32toh(nvh->nvh_dsize);
end = data + nvh->nvh_dsize;
nvh->nvh_type &= ~NV_ORDER_MASK;
nvh->nvh_type |= NV_ORDER_HOST;
} else {
if ((nvh->nvh_type & NV_ORDER_MASK) == NV_ORDER_NETWORK)
return;
end = data + nvh->nvh_dsize;
nvh->nvh_dsize = htole32(nvh->nvh_dsize);
nvh->nvh_type &= ~NV_ORDER_MASK;
nvh->nvh_type |= NV_ORDER_NETWORK;
}
vsize = 0;
switch (nvh->nvh_type & NV_TYPE_MASK) {
case NV_TYPE_INT8:
case NV_TYPE_UINT8:
case NV_TYPE_INT8_ARRAY:
case NV_TYPE_UINT8_ARRAY:
break;
case NV_TYPE_INT16:
case NV_TYPE_UINT16:
case NV_TYPE_INT16_ARRAY:
case NV_TYPE_UINT16_ARRAY:
if (vsize == 0)
vsize = 2;
/* FALLTHOUGH */
case NV_TYPE_INT32:
case NV_TYPE_UINT32:
case NV_TYPE_INT32_ARRAY:
case NV_TYPE_UINT32_ARRAY:
if (vsize == 0)
vsize = 4;
/* FALLTHOUGH */
case NV_TYPE_INT64:
case NV_TYPE_UINT64:
case NV_TYPE_INT64_ARRAY:
case NV_TYPE_UINT64_ARRAY:
if (vsize == 0)
vsize = 8;
for (p = data; p < end; p += vsize) {
if (tohost) {
switch (vsize) {
case 2:
*(uint16_t *)(void *)p =
le16toh(*(uint16_t *)(void *)p);
break;
case 4:
*(uint32_t *)(void *)p =
le32toh(*(uint32_t *)(void *)p);
break;
case 8:
*(uint64_t *)(void *)p =
le64toh(*(uint64_t *)(void *)p);
break;
default:
assert(!"invalid condition");
}
} else {
switch (vsize) {
case 2:
*(uint16_t *)(void *)p =
htole16(*(uint16_t *)(void *)p);
break;
case 4:
*(uint32_t *)(void *)p =
htole32(*(uint32_t *)(void *)p);
break;
case 8:
*(uint64_t *)(void *)p =
htole64(*(uint64_t *)(void *)p);
break;
default:
assert(!"invalid condition");
}
}
}
break;
case NV_TYPE_STRING:
break;
default:
assert(!"unrecognized type");
}
}

158
sbin/hastd/nv.h Normal file
View File

@ -0,0 +1,158 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _NV_H_
#define _NV_H_
#include <sys/cdefs.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <ebuf.h>
#define NV_TYPE_INT8 1
#define NV_TYPE_UINT8 2
#define NV_TYPE_INT16 3
#define NV_TYPE_UINT16 4
#define NV_TYPE_INT32 5
#define NV_TYPE_UINT32 6
#define NV_TYPE_INT64 7
#define NV_TYPE_UINT64 8
#define NV_TYPE_INT8_ARRAY 9
#define NV_TYPE_UINT8_ARRAY 10
#define NV_TYPE_INT16_ARRAY 11
#define NV_TYPE_UINT16_ARRAY 12
#define NV_TYPE_INT32_ARRAY 13
#define NV_TYPE_UINT32_ARRAY 14
#define NV_TYPE_INT64_ARRAY 15
#define NV_TYPE_UINT64_ARRAY 16
#define NV_TYPE_STRING 17
#define NV_TYPE_MASK 0x7f
#define NV_TYPE_FIRST NV_TYPE_INT8
#define NV_TYPE_LAST NV_TYPE_STRING
#define NV_ORDER_NETWORK 0x00
#define NV_ORDER_HOST 0x80
#define NV_ORDER_MASK 0x80
struct nv;
struct nv *nv_alloc(void);
void nv_free(struct nv *nv);
int nv_error(const struct nv *nv);
int nv_set_error(struct nv *nv, int error);
int nv_validate(struct nv *nv, size_t *extrap);
struct ebuf *nv_hton(struct nv *nv);
struct nv *nv_ntoh(struct ebuf *eb);
void nv_add_int8(struct nv *nv, int8_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_uint8(struct nv *nv, uint8_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_int16(struct nv *nv, int16_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_uint16(struct nv *nv, uint16_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_int32(struct nv *nv, int32_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_uint32(struct nv *nv, uint32_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_int64(struct nv *nv, int64_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_uint64(struct nv *nv, uint64_t value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_int8_array(struct nv *nv, const int8_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_uint8_array(struct nv *nv, const uint8_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_int16_array(struct nv *nv, const int16_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_uint16_array(struct nv *nv, const uint16_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_int32_array(struct nv *nv, const int32_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_uint32_array(struct nv *nv, const uint32_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_int64_array(struct nv *nv, const int64_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_uint64_array(struct nv *nv, const uint64_t *value, size_t size,
const char *namefmt, ...) __printflike(4, 5);
void nv_add_string(struct nv *nv, const char *value, const char *namefmt, ...)
__printflike(3, 4);
void nv_add_stringf(struct nv *nv, const char *name, const char *valuefmt, ...)
__printflike(3, 4);
void nv_add_stringv(struct nv *nv, const char *name, const char *valuefmt,
va_list valueap) __printflike(3, 0);
int8_t nv_get_int8(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
uint8_t nv_get_uint8(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
int16_t nv_get_int16(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
uint16_t nv_get_uint16(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
int32_t nv_get_int32(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
uint32_t nv_get_uint32(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
int64_t nv_get_int64(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
uint64_t nv_get_uint64(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
const int8_t *nv_get_int8_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const uint8_t *nv_get_uint8_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const int16_t *nv_get_int16_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const uint16_t *nv_get_uint16_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const int32_t *nv_get_int32_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const uint32_t *nv_get_uint32_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const int64_t *nv_get_int64_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const uint64_t *nv_get_uint64_array(struct nv *nv, size_t *sizep,
const char *namefmt, ...) __printflike(3, 4);
const char *nv_get_string(struct nv *nv, const char *namefmt, ...)
__printflike(2, 3);
void nv_dump(struct nv *nv);
#endif /* !_NV_H_ */

507
sbin/hastd/parse.y Normal file
View File

@ -0,0 +1,507 @@
%{
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/param.h> /* MAXHOSTNAMELEN */
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <arpa/inet.h>
#include <assert.h>
#include <err.h>
#include <stdio.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include "hast.h"
extern int depth;
extern int lineno;
extern FILE *yyin;
extern char *yytext;
static struct hastd_config lconfig;
static struct hast_resource *curres;
static bool mynode;
static char depth0_control[HAST_ADDRSIZE];
static char depth0_listen[HAST_ADDRSIZE];
static int depth0_replication;
static char depth1_provname[PATH_MAX];
static char depth1_localpath[PATH_MAX];
static bool
isitme(const char *name)
{
char buf[MAXHOSTNAMELEN];
char *pos;
size_t bufsize;
/*
* First check if the give name matches our full hostname.
*/
if (gethostname(buf, sizeof(buf)) < 0)
err(EX_OSERR, "gethostname() failed");
if (strcmp(buf, name) == 0)
return (true);
/*
* Now check if it matches first part of the host name.
*/
pos = strchr(buf, '.');
if (pos != NULL && pos != buf && strncmp(buf, name, pos - buf) == 0)
return (true);
/*
* At the end check if name is equal to our host's UUID.
*/
bufsize = sizeof(buf);
if (sysctlbyname("kern.hostuuid", buf, &bufsize, NULL, 0) < 0)
err(EX_OSERR, "sysctlbyname(kern.hostuuid) failed");
if (strcasecmp(buf, name) == 0)
return (true);
/*
* Looks like this isn't about us.
*/
return (false);
}
void
yyerror(const char *str)
{
fprintf(stderr, "error at line %d near '%s': %s\n",
lineno, yytext, str);
}
struct hastd_config *
yy_config_parse(const char *config)
{
int ret;
curres = NULL;
mynode = false;
depth0_replication = HAST_REPLICATION_MEMSYNC;
strlcpy(depth0_control, HAST_CONTROL, sizeof(depth0_control));
strlcpy(depth0_listen, HASTD_LISTEN, sizeof(depth0_listen));
TAILQ_INIT(&lconfig.hc_resources);
yyin = fopen(config, "r");
if (yyin == NULL)
err(EX_OSFILE, "cannot open configuration file %s", config);
ret = yyparse();
fclose(yyin);
if (ret != 0) {
yy_config_free(&lconfig);
exit(EX_CONFIG);
}
/*
* Let's see if everything is set up.
*/
if (lconfig.hc_controladdr[0] == '\0') {
strlcpy(lconfig.hc_controladdr, depth0_control,
sizeof(lconfig.hc_controladdr));
}
if (lconfig.hc_listenaddr[0] == '\0') {
strlcpy(lconfig.hc_listenaddr, depth0_listen,
sizeof(lconfig.hc_listenaddr));
}
TAILQ_FOREACH(curres, &lconfig.hc_resources, hr_next) {
assert(curres->hr_provname[0] != '\0');
assert(curres->hr_localpath[0] != '\0');
assert(curres->hr_remoteaddr[0] != '\0');
if (curres->hr_replication == -1) {
/*
* Replication is not set at resource-level.
* Use global or default setting.
*/
curres->hr_replication = depth0_replication;
}
}
return (&lconfig);
}
void
yy_config_free(struct hastd_config *config)
{
struct hast_resource *res;
while ((res = TAILQ_FIRST(&config->hc_resources)) != NULL) {
TAILQ_REMOVE(&config->hc_resources, res, hr_next);
free(res);
}
}
%}
%token CONTROL LISTEN PORT REPLICATION EXTENTSIZE RESOURCE NAME LOCAL REMOTE ON
%token FULLSYNC MEMSYNC ASYNC
%token NUM STR OB CB
%type <num> replication_type
%union
{
int num;
char *str;
}
%token <num> NUM
%token <str> STR
%%
statements:
|
statements statement
;
statement:
control_statement
|
listen_statement
|
replication_statement
|
node_statement
|
resource_statement
;
control_statement: CONTROL STR
{
switch (depth) {
case 0:
if (strlcpy(depth0_control, $2,
sizeof(depth0_control)) >=
sizeof(depth0_control)) {
errx(EX_CONFIG, "control argument too long");
}
break;
case 1:
if (mynode) {
if (strlcpy(lconfig.hc_controladdr, $2,
sizeof(lconfig.hc_controladdr)) >=
sizeof(lconfig.hc_controladdr)) {
errx(EX_CONFIG,
"control argument too long");
}
}
break;
default:
assert(!"control at wrong depth level");
}
}
;
listen_statement: LISTEN STR
{
switch (depth) {
case 0:
if (strlcpy(depth0_listen, $2,
sizeof(depth0_listen)) >=
sizeof(depth0_listen)) {
errx(EX_CONFIG, "listen argument too long");
}
break;
case 1:
if (mynode) {
if (strlcpy(lconfig.hc_listenaddr, $2,
sizeof(lconfig.hc_listenaddr)) >=
sizeof(lconfig.hc_listenaddr)) {
errx(EX_CONFIG,
"listen argument too long");
}
}
break;
default:
assert(!"listen at wrong depth level");
}
}
;
replication_statement: REPLICATION replication_type
{
switch (depth) {
case 0:
depth0_replication = $2;
break;
case 1:
if (curres != NULL)
curres->hr_replication = $2;
break;
default:
assert(!"replication at wrong depth level");
}
}
;
replication_type:
FULLSYNC { $$ = HAST_REPLICATION_FULLSYNC; }
|
MEMSYNC { $$ = HAST_REPLICATION_MEMSYNC; }
|
ASYNC { $$ = HAST_REPLICATION_ASYNC; }
;
node_statement: ON node_start OB node_entries CB
{
mynode = false;
}
;
node_start: STR
{
if (isitme($1))
mynode = true;
}
;
node_entries:
|
node_entries node_entry
;
node_entry:
control_statement
|
listen_statement
;
resource_statement: RESOURCE resource_start OB resource_entries CB
{
if (curres != NULL) {
/*
* Let's see there are some resource-level settings
* that we can use for node-level settings.
*/
if (curres->hr_provname[0] == '\0' &&
depth1_provname[0] != '\0') {
/*
* Provider name is not set at node-level,
* but is set at resource-level, use it.
*/
strlcpy(curres->hr_provname, depth1_provname,
sizeof(curres->hr_provname));
}
if (curres->hr_localpath[0] == '\0' &&
depth1_localpath[0] != '\0') {
/*
* Path to local provider is not set at
* node-level, but is set at resource-level,
* use it.
*/
strlcpy(curres->hr_localpath, depth1_localpath,
sizeof(curres->hr_localpath));
}
/*
* If provider name is not given, use resource name
* as provider name.
*/
if (curres->hr_provname[0] == '\0') {
strlcpy(curres->hr_provname, curres->hr_name,
sizeof(curres->hr_provname));
}
/*
* Remote address has to be configured at this point.
*/
if (curres->hr_remoteaddr[0] == '\0') {
errx(EX_CONFIG,
"remote address not configured for resource %s",
curres->hr_name);
}
/*
* Path to local provider has to be configured at this
* point.
*/
if (curres->hr_localpath[0] == '\0') {
errx(EX_CONFIG,
"path local component not configured for resource %s",
curres->hr_name);
}
/* Put it onto resource list. */
TAILQ_INSERT_TAIL(&lconfig.hc_resources, curres, hr_next);
curres = NULL;
}
}
;
resource_start: STR
{
/*
* Clear those, so we can tell if they were set at
* resource-level or not.
*/
depth1_provname[0] = '\0';
depth1_localpath[0] = '\0';
curres = calloc(1, sizeof(*curres));
if (curres == NULL) {
errx(EX_TEMPFAIL,
"cannot allocate memory for resource");
}
if (strlcpy(curres->hr_name, $1,
sizeof(curres->hr_name)) >=
sizeof(curres->hr_name)) {
errx(EX_CONFIG,
"resource name (%s) too long", $1);
}
curres->hr_role = HAST_ROLE_INIT;
curres->hr_previous_role = HAST_ROLE_INIT;
curres->hr_replication = -1;
curres->hr_provname[0] = '\0';
curres->hr_localpath[0] = '\0';
curres->hr_localfd = -1;
curres->hr_remoteaddr[0] = '\0';
curres->hr_ggateunit = -1;
}
;
resource_entries:
|
resource_entries resource_entry
;
resource_entry:
replication_statement
|
name_statement
|
local_statement
|
resource_node_statement
;
name_statement: NAME STR
{
switch (depth) {
case 1:
if (strlcpy(depth1_provname, $2,
sizeof(depth1_provname)) >=
sizeof(depth1_provname)) {
errx(EX_CONFIG, "name argument too long");
}
break;
case 2:
if (mynode) {
assert(curres != NULL);
if (strlcpy(curres->hr_provname, $2,
sizeof(curres->hr_provname)) >=
sizeof(curres->hr_provname)) {
errx(EX_CONFIG,
"name argument too long");
}
}
break;
default:
assert(!"name at wrong depth level");
}
}
;
local_statement: LOCAL STR
{
switch (depth) {
case 1:
if (strlcpy(depth1_localpath, $2,
sizeof(depth1_localpath)) >=
sizeof(depth1_localpath)) {
errx(EX_CONFIG, "local argument too long");
}
break;
case 2:
if (mynode) {
assert(curres != NULL);
if (strlcpy(curres->hr_localpath, $2,
sizeof(curres->hr_localpath)) >=
sizeof(curres->hr_localpath)) {
errx(EX_CONFIG,
"local argument too long");
}
}
break;
default:
assert(!"local at wrong depth level");
}
}
;
resource_node_statement:ON resource_node_start OB resource_node_entries CB
{
mynode = false;
}
;
resource_node_start: STR
{
if (curres != NULL && isitme($1))
mynode = true;
}
;
resource_node_entries:
|
resource_node_entries resource_node_entry
;
resource_node_entry:
name_statement
|
local_statement
|
remote_statement
;
remote_statement: REMOTE STR
{
assert(depth == 2);
if (mynode) {
assert(curres != NULL);
if (strlcpy(curres->hr_remoteaddr, $2,
sizeof(curres->hr_remoteaddr)) >=
sizeof(curres->hr_remoteaddr)) {
errx(EX_CONFIG, "remote argument too long");
}
}
}
;

367
sbin/hastd/pjdlog.c Normal file
View File

@ -0,0 +1,367 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <assert.h>
#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <syslog.h>
#include "pjdlog.h"
static int pjdlog_mode = PJDLOG_MODE_STD;
static int pjdlog_debug_level = 0;
static char pjdlog_prefix[128];
/*
* Configure where the logs should go.
* By default they are send to stdout/stderr, but after going into background
* (eg. by calling daemon(3)) application is responsible for changing mode to
* PJDLOG_MODE_SYSLOG, so logs will be send to syslog.
*/
void
pjdlog_mode_set(int mode)
{
assert(mode == PJDLOG_MODE_STD || mode == PJDLOG_MODE_SYSLOG);
pjdlog_mode = mode;
}
/*
* Return current mode.
*/
int
pjdlog_mode_get(void)
{
return (pjdlog_mode);
}
/*
* Set debug level. All the logs above the level specified here will be
* ignored.
*/
void
pjdlog_debug_set(int level)
{
assert(level >= 0);
pjdlog_debug_level = level;
}
/*
* Return current debug level.
*/
int
pjdlog_debug_get(void)
{
return (pjdlog_debug_level);
}
/*
* Set prefix that will be used before each log.
* Setting prefix to NULL will remove it.
*/
void
pjdlog_prefix_set(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlog_prefix_setv(fmt, ap);
va_end(ap);
}
/*
* Set prefix that will be used before each log.
* Setting prefix to NULL will remove it.
*/
void
pjdlog_prefix_setv(const char *fmt, va_list ap)
{
assert(fmt != NULL);
vsnprintf(pjdlog_prefix, sizeof(pjdlog_prefix), fmt, ap);
}
/*
* Convert log level into string.
*/
static const char *
pjdlog_level_string(int loglevel)
{
switch (loglevel) {
case LOG_EMERG:
return ("EMERG");
case LOG_ALERT:
return ("ALERT");
case LOG_CRIT:
return ("CRIT");
case LOG_ERR:
return ("ERROR");
case LOG_WARNING:
return ("WARNING");
case LOG_NOTICE:
return ("NOTICE");
case LOG_INFO:
return ("INFO");
case LOG_DEBUG:
return ("DEBUG");
}
assert(!"Invalid log level.");
abort(); /* XXX: gcc */
}
/*
* Common log routine.
*/
void
pjdlog_common(int loglevel, int debuglevel, int error, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlogv_common(loglevel, debuglevel, error, fmt, ap);
va_end(ap);
}
/*
* Common log routine, which can handle regular log level as well as debug
* level. We decide here where to send the logs (stdout/stderr or syslog).
*/
void
pjdlogv_common(int loglevel, int debuglevel, int error, const char *fmt,
va_list ap)
{
assert(loglevel == LOG_EMERG || loglevel == LOG_ALERT ||
loglevel == LOG_CRIT || loglevel == LOG_ERR ||
loglevel == LOG_WARNING || loglevel == LOG_NOTICE ||
loglevel == LOG_INFO || loglevel == LOG_DEBUG);
assert(loglevel != LOG_DEBUG || debuglevel > 0);
assert(error >= -1);
/* Ignore debug above configured level. */
if (loglevel == LOG_DEBUG && debuglevel > pjdlog_debug_level)
return;
switch (pjdlog_mode) {
case PJDLOG_MODE_STD:
{
FILE *out;
/*
* We send errors and warning to stderr and the rest to stdout.
*/
switch (loglevel) {
case LOG_EMERG:
case LOG_ALERT:
case LOG_CRIT:
case LOG_ERR:
case LOG_WARNING:
out = stderr;
break;
case LOG_NOTICE:
case LOG_INFO:
case LOG_DEBUG:
out = stdout;
break;
default:
assert(!"Invalid loglevel.");
abort(); /* XXX: gcc */
}
fprintf(out, "[%s]", pjdlog_level_string(loglevel));
/* Attach debuglevel if this is debug log. */
if (loglevel == LOG_DEBUG)
fprintf(out, "[%d]", debuglevel);
fprintf(out, " ");
fprintf(out, "%s", pjdlog_prefix);
vfprintf(out, fmt, ap);
if (error != -1)
fprintf(out, ": %s.", strerror(error));
fprintf(out, "\n");
break;
}
case PJDLOG_MODE_SYSLOG:
{
char log[1024];
int len;
len = snprintf(log, sizeof(log), "%s", pjdlog_prefix);
if ((size_t)len < sizeof(log))
len = vsnprintf(log + len, sizeof(log) - len, fmt, ap);
if (error != -1 && (size_t)len < sizeof(log)) {
(void)snprintf(log + len, sizeof(log) - len, ": %s.",
strerror(error));
}
syslog(loglevel, "%s", log);
break;
}
default:
assert(!"Invalid mode.");
}
}
/*
* Regular logs.
*/
void
pjdlogv(int loglevel, const char *fmt, va_list ap)
{
/* LOG_DEBUG is invalid here, pjdlogv?_debug() should be used. */
assert(loglevel == LOG_EMERG || loglevel == LOG_ALERT ||
loglevel == LOG_CRIT || loglevel == LOG_ERR ||
loglevel == LOG_WARNING || loglevel == LOG_NOTICE ||
loglevel == LOG_INFO);
pjdlogv_common(loglevel, 0, -1, fmt, ap);
}
/*
* Regular logs.
*/
void
pjdlog(int loglevel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlogv(loglevel, fmt, ap);
va_end(ap);
}
/*
* Debug logs.
*/
void
pjdlogv_debug(int debuglevel, const char *fmt, va_list ap)
{
pjdlogv_common(LOG_DEBUG, debuglevel, -1, fmt, ap);
}
/*
* Debug logs.
*/
void
pjdlog_debug(int debuglevel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlogv_debug(debuglevel, fmt, ap);
va_end(ap);
}
/*
* Error logs with errno logging.
*/
void
pjdlogv_errno(int loglevel, const char *fmt, va_list ap)
{
pjdlogv_common(loglevel, 0, errno, fmt, ap);
}
/*
* Error logs with errno logging.
*/
void
pjdlog_errno(int loglevel, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlogv_errno(loglevel, fmt, ap);
va_end(ap);
}
/*
* Log error, errno and exit.
*/
void
pjdlogv_exit(int exitcode, const char *fmt, va_list ap)
{
pjdlogv_errno(LOG_ERR, fmt, ap);
exit(exitcode);
}
/*
* Log error, errno and exit.
*/
void
pjdlog_exit(int exitcode, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlogv_exit(exitcode, fmt, ap);
/* NOTREACHED */
va_end(ap);
}
/*
* Log error and exit.
*/
void
pjdlogv_exitx(int exitcode, const char *fmt, va_list ap)
{
pjdlogv(LOG_ERR, fmt, ap);
exit(exitcode);
}
/*
* Log error and exit.
*/
void
pjdlog_exitx(int exitcode, const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
pjdlogv_exitx(exitcode, fmt, ap);
/* NOTREACHED */
va_end(ap);
}

88
sbin/hastd/pjdlog.h Normal file
View File

@ -0,0 +1,88 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _PJDLOG_H_
#define _PJDLOG_H_
#include <sys/cdefs.h>
#include <stdarg.h>
#include <sysexits.h>
#include <syslog.h>
#define PJDLOG_MODE_STD 0
#define PJDLOG_MODE_SYSLOG 1
void pjdlog_mode_set(int mode);
int pjdlog_mode_get(void);
void pjdlog_debug_set(int level);
int pjdlog_debug_get(void);
void pjdlog_prefix_set(const char *fmt, ...) __printflike(1, 2);
void pjdlog_prefix_setv(const char *fmt, va_list ap) __printflike(1, 0);
void pjdlog_common(int loglevel, int debuglevel, int error, const char *fmt,
...) __printflike(4, 5);
void pjdlogv_common(int loglevel, int debuglevel, int error, const char *fmt,
va_list ap) __printflike(4, 0);
void pjdlog(int loglevel, const char *fmt, ...) __printflike(2, 3);
void pjdlogv(int loglevel, const char *fmt, va_list ap) __printflike(2, 0);
#define pjdlogv_emergency(fmt, ap) pjdlogv(LOG_EMERG, (fmt), (ap))
#define pjdlog_emergency(...) pjdlog(LOG_EMERG, __VA_ARGS__)
#define pjdlogv_alert(fmt, ap) pjdlogv(LOG_ALERT, (fmt), (ap))
#define pjdlog_alert(...) pjdlog(LOG_ALERT, __VA_ARGS__)
#define pjdlogv_critical(fmt, ap) pjdlogv(LOG_CRIT, (fmt), (ap))
#define pjdlog_critical(...) pjdlog(LOG_CRIT, __VA_ARGS__)
#define pjdlogv_error(fmt, ap) pjdlogv(LOG_ERR, (fmt), (ap))
#define pjdlog_error(...) pjdlog(LOG_ERR, __VA_ARGS__)
#define pjdlogv_warning(fmt, ap) pjdlogv(LOG_WARNING, (fmt), (ap))
#define pjdlog_warning(...) pjdlog(LOG_WARNING, __VA_ARGS__)
#define pjdlogv_notice(fmt, ap) pjdlogv(LOG_NOTICE, (fmt), (ap))
#define pjdlog_notice(...) pjdlog(LOG_NOTICE, __VA_ARGS__)
#define pjdlogv_info(fmt, ap) pjdlogv(LOG_INFO, (fmt), (ap))
#define pjdlog_info(...) pjdlog(LOG_INFO, __VA_ARGS__)
void pjdlog_debug(int debuglevel, const char *fmt, ...) __printflike(2, 3);
void pjdlogv_debug(int debuglevel, const char *fmt, va_list ap) __printflike(2, 0);
void pjdlog_errno(int loglevel, const char *fmt, ...) __printflike(2, 3);
void pjdlogv_errno(int loglevel, const char *fmt, va_list ap) __printflike(2, 0);
void pjdlog_exit(int exitcode, const char *fmt, ...) __printflike(2, 3) __dead2;
void pjdlogv_exit(int exitcode, const char *fmt, va_list ap) __printflike(2, 0) __dead2;
void pjdlog_exitx(int exitcode, const char *fmt, ...) __printflike(2, 3) __dead2;
void pjdlogv_exitx(int exitcode, const char *fmt, va_list ap) __printflike(2, 0) __dead2;
#endif /* !_PJDLOG_H_ */

1769
sbin/hastd/primary.c Normal file

File diff suppressed because it is too large Load Diff

261
sbin/hastd/proto.c Normal file
View File

@ -0,0 +1,261 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <assert.h>
#include <errno.h>
#include <stdint.h>
#include "proto.h"
#include "proto_impl.h"
#define PROTO_CONN_MAGIC 0x907041c
struct proto_conn {
int pc_magic;
struct hast_proto *pc_proto;
void *pc_ctx;
int pc_side;
#define PROTO_SIDE_CLIENT 0
#define PROTO_SIDE_SERVER_LISTEN 1
#define PROTO_SIDE_SERVER_WORK 2
};
static LIST_HEAD(, hast_proto) protos = LIST_HEAD_INITIALIZER(protos);
void
proto_register(struct hast_proto *proto)
{
LIST_INSERT_HEAD(&protos, proto, hp_next);
}
static int
proto_common_setup(const char *addr, struct proto_conn **connp, int side)
{
struct hast_proto *proto;
struct proto_conn *conn;
void *ctx;
int ret;
assert(side == PROTO_SIDE_CLIENT || side == PROTO_SIDE_SERVER_LISTEN);
conn = malloc(sizeof(*conn));
if (conn == NULL)
return (-1);
LIST_FOREACH(proto, &protos, hp_next) {
if (side == PROTO_SIDE_CLIENT)
ret = proto->hp_client(addr, &ctx);
else /* if (side == PROTO_SIDE_SERVER_LISTEN) */
ret = proto->hp_server(addr, &ctx);
/*
* ret == 0 - success
* ret == -1 - addr is not for this protocol
* ret > 0 - right protocol, but an error occured
*/
if (ret >= 0)
break;
}
if (proto == NULL) {
/* Unrecognized address. */
free(conn);
errno = EINVAL;
return (-1);
}
if (ret > 0) {
/* An error occured. */
free(conn);
errno = ret;
return (-1);
}
conn->pc_proto = proto;
conn->pc_ctx = ctx;
conn->pc_side = side;
conn->pc_magic = PROTO_CONN_MAGIC;
*connp = conn;
return (0);
}
int
proto_client(const char *addr, struct proto_conn **connp)
{
return (proto_common_setup(addr, connp, PROTO_SIDE_CLIENT));
}
int
proto_connect(struct proto_conn *conn)
{
int ret;
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_side == PROTO_SIDE_CLIENT);
assert(conn->pc_proto != NULL);
ret = conn->pc_proto->hp_connect(conn->pc_ctx);
if (ret != 0) {
errno = ret;
return (-1);
}
return (0);
}
int
proto_server(const char *addr, struct proto_conn **connp)
{
return (proto_common_setup(addr, connp, PROTO_SIDE_SERVER_LISTEN));
}
int
proto_accept(struct proto_conn *conn, struct proto_conn **newconnp)
{
struct proto_conn *newconn;
int ret;
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_side == PROTO_SIDE_SERVER_LISTEN);
assert(conn->pc_proto != NULL);
newconn = malloc(sizeof(*newconn));
if (newconn == NULL)
return (-1);
ret = conn->pc_proto->hp_accept(conn->pc_ctx, &newconn->pc_ctx);
if (ret != 0) {
free(newconn);
errno = ret;
return (-1);
}
newconn->pc_proto = conn->pc_proto;
newconn->pc_side = PROTO_SIDE_SERVER_WORK;
newconn->pc_magic = PROTO_CONN_MAGIC;
*newconnp = newconn;
return (0);
}
int
proto_send(struct proto_conn *conn, const void *data, size_t size)
{
int ret;
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
ret = conn->pc_proto->hp_send(conn->pc_ctx, data, size);
if (ret != 0) {
errno = ret;
return (-1);
}
return (0);
}
int
proto_recv(struct proto_conn *conn, void *data, size_t size)
{
int ret;
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
ret = conn->pc_proto->hp_recv(conn->pc_ctx, data, size);
if (ret != 0) {
errno = ret;
return (-1);
}
return (0);
}
int
proto_descriptor(const struct proto_conn *conn)
{
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
return (conn->pc_proto->hp_descriptor(conn->pc_ctx));
}
bool
proto_address_match(const struct proto_conn *conn, const char *addr)
{
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
return (conn->pc_proto->hp_address_match(conn->pc_ctx, addr));
}
void
proto_local_address(const struct proto_conn *conn, char *addr, size_t size)
{
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
conn->pc_proto->hp_local_address(conn->pc_ctx, addr, size);
}
void
proto_remote_address(const struct proto_conn *conn, char *addr, size_t size)
{
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
conn->pc_proto->hp_remote_address(conn->pc_ctx, addr, size);
}
void
proto_close(struct proto_conn *conn)
{
assert(conn != NULL);
assert(conn->pc_magic == PROTO_CONN_MAGIC);
assert(conn->pc_proto != NULL);
conn->pc_proto->hp_close(conn->pc_ctx);
conn->pc_magic = 0;
free(conn);
}

54
sbin/hastd/proto.h Normal file
View File

@ -0,0 +1,54 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _PROTO_H_
#define _PROTO_H_
#include <stdbool.h> /* bool */
#include <stdlib.h> /* size_t */
struct proto_conn;
int proto_client(const char *addr, struct proto_conn **connp);
int proto_connect(struct proto_conn *conn);
int proto_server(const char *addr, struct proto_conn **connp);
int proto_accept(struct proto_conn *conn, struct proto_conn **newconnp);
int proto_send(struct proto_conn *conn, const void *data, size_t size);
int proto_recv(struct proto_conn *conn, void *data, size_t size);
int proto_descriptor(const struct proto_conn *conn);
bool proto_address_match(const struct proto_conn *conn, const char *addr);
void proto_local_address(const struct proto_conn *conn, char *addr,
size_t size);
void proto_remote_address(const struct proto_conn *conn, char *addr,
size_t size);
void proto_close(struct proto_conn *conn);
#endif /* !_PROTO_H_ */

85
sbin/hastd/proto_common.c Normal file
View File

@ -0,0 +1,85 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/socket.h>
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <strings.h>
#include "proto_impl.h"
/* Maximum size of packet we want to use when sending data. */
#ifndef MAX_SEND_SIZE
//#define MAX_SEND_SIZE 32768
#define MAX_SEND_SIZE 131072
#endif
int
proto_common_send(int fd, const unsigned char *data, size_t size)
{
ssize_t done;
size_t sendsize;
do {
sendsize = size < MAX_SEND_SIZE ? size : MAX_SEND_SIZE;
done = send(fd, data, sendsize, MSG_NOSIGNAL);
if (done == 0)
return (ENOTCONN);
else if (done < 0) {
if (errno == EAGAIN)
continue;
return (errno);
}
data += done;
size -= done;
} while (size > 0);
return (0);
}
int
proto_common_recv(int fd, unsigned char *data, size_t size)
{
ssize_t done;
do {
done = recv(fd, data, size, MSG_WAITALL);
} while (done == -1 && errno == EAGAIN);
if (done == 0)
return (ENOTCONN);
else if (done < 0)
return (errno);
return (0);
}

75
sbin/hastd/proto_impl.h Normal file
View File

@ -0,0 +1,75 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _PROTO_IMPL_H_
#define _PROTO_IMPL_H_
#include <sys/queue.h>
#include <stdbool.h> /* bool */
#include <stdlib.h> /* size_t */
#define __constructor __attribute__((constructor))
typedef int hp_client_t(const char *, void **);
typedef int hp_connect_t(void *);
typedef int hp_server_t(const char *, void **);
typedef int hp_accept_t(void *, void **);
typedef int hp_send_t(void *, const unsigned char *, size_t);
typedef int hp_recv_t(void *, unsigned char *, size_t);
typedef int hp_descriptor_t(const void *);
typedef bool hp_address_match_t(const void *, const char *);
typedef void hp_local_address_t(const void *, char *, size_t);
typedef void hp_remote_address_t(const void *, char *, size_t);
typedef void hp_close_t(void *);
struct hast_proto {
const char *hp_name;
hp_client_t *hp_client;
hp_connect_t *hp_connect;
hp_server_t *hp_server;
hp_accept_t *hp_accept;
hp_send_t *hp_send;
hp_recv_t *hp_recv;
hp_descriptor_t *hp_descriptor;
hp_address_match_t *hp_address_match;
hp_local_address_t *hp_local_address;
hp_remote_address_t *hp_remote_address;
hp_close_t *hp_close;
LIST_ENTRY(hast_proto) hp_next;
};
void proto_register(struct hast_proto *proto);
int proto_common_send(int fd, const unsigned char *data, size_t size);
int proto_common_recv(int fd, unsigned char *data, size_t size);
#endif /* !_PROTO_IMPL_H_ */

View File

@ -0,0 +1,272 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/socket.h>
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "hast.h"
#include "proto_impl.h"
#define SP_CTX_MAGIC 0x50c3741
struct sp_ctx {
int sp_magic;
int sp_fd[2];
int sp_side;
#define SP_SIDE_UNDEF 0
#define SP_SIDE_CLIENT 1
#define SP_SIDE_SERVER 2
};
static void sp_close(void *ctx);
static int
sp_client(const char *addr, void **ctxp)
{
struct sp_ctx *spctx;
int ret;
if (strcmp(addr, "socketpair://") != 0)
return (-1);
spctx = malloc(sizeof(*spctx));
if (spctx == NULL)
return (errno);
if (socketpair(PF_UNIX, SOCK_STREAM, 0, spctx->sp_fd) < 0) {
ret = errno;
free(spctx);
return (ret);
}
spctx->sp_side = SP_SIDE_UNDEF;
spctx->sp_magic = SP_CTX_MAGIC;
*ctxp = spctx;
return (0);
}
static int
sp_connect(void *ctx __unused)
{
assert(!"proto_connect() not supported on socketpairs");
abort();
}
static int
sp_server(const char *addr __unused, void **ctxp __unused)
{
assert(!"proto_server() not supported on socketpairs");
abort();
}
static int
sp_accept(void *ctx __unused, void **newctxp __unused)
{
assert(!"proto_server() not supported on socketpairs");
abort();
}
static int
sp_send(void *ctx, const unsigned char *data, size_t size)
{
struct sp_ctx *spctx = ctx;
int fd;
assert(spctx != NULL);
assert(spctx->sp_magic == SP_CTX_MAGIC);
switch (spctx->sp_side) {
case SP_SIDE_UNDEF:
/*
* If the first operation done by the caller is proto_send(),
* we assume this the client.
*/
/* FALLTHROUGH */
spctx->sp_side = SP_SIDE_CLIENT;
/* Close other end. */
close(spctx->sp_fd[1]);
case SP_SIDE_CLIENT:
assert(spctx->sp_fd[0] >= 0);
fd = spctx->sp_fd[0];
break;
case SP_SIDE_SERVER:
assert(spctx->sp_fd[1] >= 0);
fd = spctx->sp_fd[1];
break;
default:
abort();
}
return (proto_common_send(fd, data, size));
}
static int
sp_recv(void *ctx, unsigned char *data, size_t size)
{
struct sp_ctx *spctx = ctx;
int fd;
assert(spctx != NULL);
assert(spctx->sp_magic == SP_CTX_MAGIC);
switch (spctx->sp_side) {
case SP_SIDE_UNDEF:
/*
* If the first operation done by the caller is proto_recv(),
* we assume this the server.
*/
/* FALLTHROUGH */
spctx->sp_side = SP_SIDE_SERVER;
/* Close other end. */
close(spctx->sp_fd[0]);
case SP_SIDE_SERVER:
assert(spctx->sp_fd[1] >= 0);
fd = spctx->sp_fd[1];
break;
case SP_SIDE_CLIENT:
assert(spctx->sp_fd[0] >= 0);
fd = spctx->sp_fd[0];
break;
default:
abort();
}
return (proto_common_recv(fd, data, size));
}
static int
sp_descriptor(const void *ctx)
{
const struct sp_ctx *spctx = ctx;
assert(spctx != NULL);
assert(spctx->sp_magic == SP_CTX_MAGIC);
assert(spctx->sp_side == SP_SIDE_CLIENT ||
spctx->sp_side == SP_SIDE_SERVER);
switch (spctx->sp_side) {
case SP_SIDE_CLIENT:
assert(spctx->sp_fd[0] >= 0);
return (spctx->sp_fd[0]);
case SP_SIDE_SERVER:
assert(spctx->sp_fd[1] >= 0);
return (spctx->sp_fd[1]);
}
abort();
}
static bool
sp_address_match(const void *ctx __unused, const char *addr __unused)
{
assert(!"proto_address_match() not supported on socketpairs");
abort();
}
static void
sp_local_address(const void *ctx __unused, char *addr __unused,
size_t size __unused)
{
assert(!"proto_local_address() not supported on socketpairs");
abort();
}
static void
sp_remote_address(const void *ctx __unused, char *addr __unused,
size_t size __unused)
{
assert(!"proto_remote_address() not supported on socketpairs");
abort();
}
static void
sp_close(void *ctx)
{
struct sp_ctx *spctx = ctx;
assert(spctx != NULL);
assert(spctx->sp_magic == SP_CTX_MAGIC);
switch (spctx->sp_side) {
case SP_SIDE_UNDEF:
close(spctx->sp_fd[0]);
close(spctx->sp_fd[1]);
break;
case SP_SIDE_CLIENT:
close(spctx->sp_fd[0]);
break;
case SP_SIDE_SERVER:
close(spctx->sp_fd[1]);
break;
default:
abort();
}
spctx->sp_magic = 0;
free(spctx);
}
static struct hast_proto sp_proto = {
.hp_name = "socketpair",
.hp_client = sp_client,
.hp_connect = sp_connect,
.hp_server = sp_server,
.hp_accept = sp_accept,
.hp_send = sp_send,
.hp_recv = sp_recv,
.hp_descriptor = sp_descriptor,
.hp_address_match = sp_address_match,
.hp_local_address = sp_local_address,
.hp_remote_address = sp_remote_address,
.hp_close = sp_close
};
static __constructor void
sp_ctor(void)
{
proto_register(&sp_proto);
}

447
sbin/hastd/proto_tcp4.c Normal file
View File

@ -0,0 +1,447 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h> /* MAXHOSTNAMELEN */
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <assert.h>
#include <errno.h>
#include <netdb.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "hast.h"
#include "pjdlog.h"
#include "proto_impl.h"
#define TCP4_CTX_MAGIC 0x7c441c
struct tcp4_ctx {
int tc_magic;
struct sockaddr_in tc_sin;
int tc_fd;
int tc_side;
#define TCP4_SIDE_CLIENT 0
#define TCP4_SIDE_SERVER_LISTEN 1
#define TCP4_SIDE_SERVER_WORK 2
};
static void tcp4_close(void *ctx);
static in_addr_t
str2ip(const char *str)
{
struct hostent *hp;
in_addr_t ip;
ip = inet_addr(str);
if (ip != INADDR_NONE) {
/* It is a valid IP address. */
return (ip);
}
/* Check if it is a valid host name. */
hp = gethostbyname(str);
if (hp == NULL)
return (INADDR_NONE);
return (((struct in_addr *)(void *)hp->h_addr)->s_addr);
}
/*
* Function converts the given string to unsigned number.
*/
static int
numfromstr(const char *str, intmax_t minnum, intmax_t maxnum, intmax_t *nump)
{
intmax_t digit, num;
if (str[0] == '\0')
goto invalid; /* Empty string. */
num = 0;
for (; *str != '\0'; str++) {
if (*str < '0' || *str > '9')
goto invalid; /* Non-digit character. */
digit = *str - '0';
if (num > num * 10 + digit)
goto invalid; /* Overflow. */
num = num * 10 + digit;
if (num > maxnum)
goto invalid; /* Too big. */
}
if (num < minnum)
goto invalid; /* Too small. */
*nump = num;
return (0);
invalid:
errno = EINVAL;
return (-1);
}
static int
tcp4_addr(const char *addr, struct sockaddr_in *sinp)
{
char iporhost[MAXHOSTNAMELEN];
const char *pp;
size_t size;
in_addr_t ip;
if (addr == NULL)
return (-1);
if (strncasecmp(addr, "tcp4://", 7) == 0)
addr += 7;
else if (strncasecmp(addr, "tcp://", 6) == 0)
addr += 6;
else if (addr[0] != '/' && /* If this is not path... */
strstr(addr, "://") == NULL)/* ...and has no prefix... */
; /* ...tcp4 is the default. */
else
return (-1);
sinp->sin_family = AF_INET;
sinp->sin_len = sizeof(*sinp);
/* Extract optional port. */
pp = strrchr(addr, ':');
if (pp == NULL) {
/* Port not given, use the default. */
sinp->sin_port = htons(HASTD_PORT);
} else {
intmax_t port;
if (numfromstr(pp + 1, 1, 65535, &port) < 0)
return (errno);
sinp->sin_port = htons(port);
}
/* Extract host name or IP address. */
if (pp == NULL) {
size = sizeof(iporhost);
if (strlcpy(iporhost, addr, size) >= size)
return (ENAMETOOLONG);
} else {
size = (size_t)(pp - addr + 1);
if (size > sizeof(iporhost))
return (ENAMETOOLONG);
strlcpy(iporhost, addr, size);
}
/* Convert string (IP address or host name) to in_addr_t. */
ip = str2ip(iporhost);
if (ip == INADDR_NONE)
return (EINVAL);
sinp->sin_addr.s_addr = ip;
return (0);
}
static int
tcp4_common_setup(const char *addr, void **ctxp, int side)
{
struct tcp4_ctx *tctx;
int ret, val;
tctx = malloc(sizeof(*tctx));
if (tctx == NULL)
return (errno);
/* Parse given address. */
if ((ret = tcp4_addr(addr, &tctx->tc_sin)) != 0) {
free(tctx);
return (ret);
}
tctx->tc_fd = socket(AF_INET, SOCK_STREAM, 0);
if (tctx->tc_fd == -1) {
ret = errno;
free(tctx);
return (ret);
}
/* Socket settings. */
val = 1;
if (setsockopt(tctx->tc_fd, IPPROTO_TCP, TCP_NODELAY, &val,
sizeof(val)) == -1) {
pjdlog_warning("Unable to set TCP_NOELAY on %s", addr);
}
val = 131072;
if (setsockopt(tctx->tc_fd, SOL_SOCKET, SO_SNDBUF, &val,
sizeof(val)) == -1) {
pjdlog_warning("Unable to set send buffer size on %s", addr);
}
val = 131072;
if (setsockopt(tctx->tc_fd, SOL_SOCKET, SO_RCVBUF, &val,
sizeof(val)) == -1) {
pjdlog_warning("Unable to set receive buffer size on %s", addr);
}
tctx->tc_side = side;
tctx->tc_magic = TCP4_CTX_MAGIC;
*ctxp = tctx;
return (0);
}
static int
tcp4_client(const char *addr, void **ctxp)
{
return (tcp4_common_setup(addr, ctxp, TCP4_SIDE_CLIENT));
}
static int
tcp4_connect(void *ctx)
{
struct tcp4_ctx *tctx = ctx;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
assert(tctx->tc_side == TCP4_SIDE_CLIENT);
assert(tctx->tc_fd >= 0);
if (connect(tctx->tc_fd, (struct sockaddr *)&tctx->tc_sin,
sizeof(tctx->tc_sin)) < 0) {
return (errno);
}
return (0);
}
static int
tcp4_server(const char *addr, void **ctxp)
{
struct tcp4_ctx *tctx;
int ret, val;
ret = tcp4_common_setup(addr, ctxp, TCP4_SIDE_SERVER_LISTEN);
if (ret != 0)
return (ret);
tctx = *ctxp;
val = 1;
/* Ignore failure. */
(void)setsockopt(tctx->tc_fd, SOL_SOCKET, SO_REUSEADDR, &val,
sizeof(val));
if (bind(tctx->tc_fd, (struct sockaddr *)&tctx->tc_sin,
sizeof(tctx->tc_sin)) < 0) {
ret = errno;
tcp4_close(tctx);
return (ret);
}
if (listen(tctx->tc_fd, 8) < 0) {
ret = errno;
tcp4_close(tctx);
return (ret);
}
return (0);
}
static int
tcp4_accept(void *ctx, void **newctxp)
{
struct tcp4_ctx *tctx = ctx;
struct tcp4_ctx *newtctx;
socklen_t fromlen;
int ret;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
assert(tctx->tc_side == TCP4_SIDE_SERVER_LISTEN);
assert(tctx->tc_fd >= 0);
newtctx = malloc(sizeof(*newtctx));
if (newtctx == NULL)
return (errno);
fromlen = sizeof(tctx->tc_sin);
newtctx->tc_fd = accept(tctx->tc_fd, (struct sockaddr *)&tctx->tc_sin,
&fromlen);
if (newtctx->tc_fd < 0) {
ret = errno;
free(newtctx);
return (ret);
}
newtctx->tc_side = TCP4_SIDE_SERVER_WORK;
newtctx->tc_magic = TCP4_CTX_MAGIC;
*newctxp = newtctx;
return (0);
}
static int
tcp4_send(void *ctx, const unsigned char *data, size_t size)
{
struct tcp4_ctx *tctx = ctx;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
assert(tctx->tc_fd >= 0);
return (proto_common_send(tctx->tc_fd, data, size));
}
static int
tcp4_recv(void *ctx, unsigned char *data, size_t size)
{
struct tcp4_ctx *tctx = ctx;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
assert(tctx->tc_fd >= 0);
return (proto_common_recv(tctx->tc_fd, data, size));
}
static int
tcp4_descriptor(const void *ctx)
{
const struct tcp4_ctx *tctx = ctx;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
return (tctx->tc_fd);
}
static void
sin2str(struct sockaddr_in *sinp, char *addr, size_t size)
{
in_addr_t ip;
unsigned int port;
assert(addr != NULL);
assert(sinp->sin_family == AF_INET);
ip = ntohl(sinp->sin_addr.s_addr);
port = ntohs(sinp->sin_port);
snprintf(addr, size, "tcp4://%u.%u.%u.%u:%u", ((ip >> 24) & 0xff),
((ip >> 16) & 0xff), ((ip >> 8) & 0xff), (ip & 0xff), port);
}
static bool
tcp4_address_match(const void *ctx, const char *addr)
{
const struct tcp4_ctx *tctx = ctx;
struct sockaddr_in sin;
socklen_t sinlen;
in_addr_t ip1, ip2;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
if (tcp4_addr(addr, &sin) != 0)
return (false);
ip1 = sin.sin_addr.s_addr;
sinlen = sizeof(sin);
if (getpeername(tctx->tc_fd, (struct sockaddr *)&sin, &sinlen) < 0)
return (false);
ip2 = sin.sin_addr.s_addr;
return (ip1 == ip2);
}
static void
tcp4_local_address(const void *ctx, char *addr, size_t size)
{
const struct tcp4_ctx *tctx = ctx;
struct sockaddr_in sin;
socklen_t sinlen;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
sinlen = sizeof(sin);
if (getsockname(tctx->tc_fd, (struct sockaddr *)&sin, &sinlen) < 0) {
strlcpy(addr, "N/A", size);
return;
}
sin2str(&sin, addr, size);
}
static void
tcp4_remote_address(const void *ctx, char *addr, size_t size)
{
const struct tcp4_ctx *tctx = ctx;
struct sockaddr_in sin;
socklen_t sinlen;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
sinlen = sizeof(sin);
if (getpeername(tctx->tc_fd, (struct sockaddr *)&sin, &sinlen) < 0) {
strlcpy(addr, "N/A", size);
return;
}
sin2str(&sin, addr, size);
}
static void
tcp4_close(void *ctx)
{
struct tcp4_ctx *tctx = ctx;
assert(tctx != NULL);
assert(tctx->tc_magic == TCP4_CTX_MAGIC);
if (tctx->tc_fd >= 0)
close(tctx->tc_fd);
tctx->tc_magic = 0;
free(tctx);
}
static struct hast_proto tcp4_proto = {
.hp_name = "tcp4",
.hp_client = tcp4_client,
.hp_connect = tcp4_connect,
.hp_server = tcp4_server,
.hp_accept = tcp4_accept,
.hp_send = tcp4_send,
.hp_recv = tcp4_recv,
.hp_descriptor = tcp4_descriptor,
.hp_address_match = tcp4_address_match,
.hp_local_address = tcp4_local_address,
.hp_remote_address = tcp4_remote_address,
.hp_close = tcp4_close
};
static __constructor void
tcp4_ctor(void)
{
proto_register(&tcp4_proto);
}

330
sbin/hastd/proto_uds.c Normal file
View File

@ -0,0 +1,330 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/* UDS - UNIX Domain Socket */
#include <sys/un.h>
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include "hast.h"
#include "proto_impl.h"
#define UDS_CTX_MAGIC 0xd541c
struct uds_ctx {
int uc_magic;
struct sockaddr_un uc_sun;
int uc_fd;
int uc_side;
#define UDS_SIDE_CLIENT 0
#define UDS_SIDE_SERVER_LISTEN 1
#define UDS_SIDE_SERVER_WORK 2
};
static void uds_close(void *ctx);
static int
uds_addr(const char *addr, struct sockaddr_un *sunp)
{
if (addr == NULL)
return (-1);
if (strncasecmp(addr, "uds://", 6) == 0)
addr += 6;
else if (strncasecmp(addr, "unix://", 7) == 0)
addr += 7;
else if (addr[0] == '/' && /* If it starts from /... */
strstr(addr, "://") == NULL)/* ...and there is no prefix... */
; /* ...we assume its us. */
else
return (-1);
sunp->sun_family = AF_UNIX;
if (strlcpy(sunp->sun_path, addr, sizeof(sunp->sun_path)) >=
sizeof(sunp->sun_path)) {
return (ENAMETOOLONG);
}
sunp->sun_len = SUN_LEN(sunp);
return (0);
}
static int
uds_common_setup(const char *addr, void **ctxp, int side)
{
struct uds_ctx *uctx;
int ret;
uctx = malloc(sizeof(*uctx));
if (uctx == NULL)
return (errno);
/* Parse given address. */
if ((ret = uds_addr(addr, &uctx->uc_sun)) != 0) {
free(uctx);
return (ret);
}
uctx->uc_fd = socket(AF_UNIX, SOCK_STREAM, 0);
if (uctx->uc_fd == -1) {
ret = errno;
free(uctx);
return (ret);
}
uctx->uc_side = side;
uctx->uc_magic = UDS_CTX_MAGIC;
*ctxp = uctx;
return (0);
}
static int
uds_client(const char *addr, void **ctxp)
{
return (uds_common_setup(addr, ctxp, UDS_SIDE_CLIENT));
}
static int
uds_connect(void *ctx)
{
struct uds_ctx *uctx = ctx;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
assert(uctx->uc_side == UDS_SIDE_CLIENT);
assert(uctx->uc_fd >= 0);
if (connect(uctx->uc_fd, (struct sockaddr *)&uctx->uc_sun,
sizeof(uctx->uc_sun)) < 0) {
return (errno);
}
return (0);
}
static int
uds_server(const char *addr, void **ctxp)
{
struct uds_ctx *uctx;
int ret;
ret = uds_common_setup(addr, ctxp, UDS_SIDE_SERVER_LISTEN);
if (ret != 0)
return (ret);
uctx = *ctxp;
unlink(uctx->uc_sun.sun_path);
if (bind(uctx->uc_fd, (struct sockaddr *)&uctx->uc_sun,
sizeof(uctx->uc_sun)) < 0) {
ret = errno;
uds_close(uctx);
return (ret);
}
if (listen(uctx->uc_fd, 8) < 0) {
ret = errno;
uds_close(uctx);
return (ret);
}
return (0);
}
static int
uds_accept(void *ctx, void **newctxp)
{
struct uds_ctx *uctx = ctx;
struct uds_ctx *newuctx;
socklen_t fromlen;
int ret;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
assert(uctx->uc_side == UDS_SIDE_SERVER_LISTEN);
assert(uctx->uc_fd >= 0);
newuctx = malloc(sizeof(*newuctx));
if (newuctx == NULL)
return (errno);
fromlen = sizeof(uctx->uc_sun);
newuctx->uc_fd = accept(uctx->uc_fd, (struct sockaddr *)&uctx->uc_sun,
&fromlen);
if (newuctx->uc_fd < 0) {
ret = errno;
free(newuctx);
return (ret);
}
newuctx->uc_side = UDS_SIDE_SERVER_WORK;
newuctx->uc_magic = UDS_CTX_MAGIC;
*newctxp = newuctx;
return (0);
}
static int
uds_send(void *ctx, const unsigned char *data, size_t size)
{
struct uds_ctx *uctx = ctx;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
assert(uctx->uc_fd >= 0);
return (proto_common_send(uctx->uc_fd, data, size));
}
static int
uds_recv(void *ctx, unsigned char *data, size_t size)
{
struct uds_ctx *uctx = ctx;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
assert(uctx->uc_fd >= 0);
return (proto_common_recv(uctx->uc_fd, data, size));
}
static int
uds_descriptor(const void *ctx)
{
const struct uds_ctx *uctx = ctx;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
return (uctx->uc_fd);
}
static bool
uds_address_match(const void *ctx __unused, const char *addr __unused)
{
assert(!"proto_address_match() not supported on UNIX domain sockets");
abort();
}
static void
uds_local_address(const void *ctx, char *addr, size_t size)
{
const struct uds_ctx *uctx = ctx;
struct sockaddr_un sun;
socklen_t sunlen;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
assert(addr != NULL);
sunlen = sizeof(sun);
if (getsockname(uctx->uc_fd, (struct sockaddr *)&sun, &sunlen) < 0) {
strlcpy(addr, "N/A", size);
return;
}
assert(sun.sun_family == AF_UNIX);
if (sun.sun_path[0] == '\0') {
strlcpy(addr, "N/A", size);
return;
}
snprintf(addr, size, "uds://%s", sun.sun_path);
}
static void
uds_remote_address(const void *ctx, char *addr, size_t size)
{
const struct uds_ctx *uctx = ctx;
struct sockaddr_un sun;
socklen_t sunlen;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
assert(addr != NULL);
sunlen = sizeof(sun);
if (getpeername(uctx->uc_fd, (struct sockaddr *)&sun, &sunlen) < 0) {
strlcpy(addr, "N/A", size);
return;
}
assert(sun.sun_family == AF_UNIX);
if (sun.sun_path[0] == '\0') {
strlcpy(addr, "N/A", size);
return;
}
snprintf(addr, size, "uds://%s", sun.sun_path);
}
static void
uds_close(void *ctx)
{
struct uds_ctx *uctx = ctx;
assert(uctx != NULL);
assert(uctx->uc_magic == UDS_CTX_MAGIC);
if (uctx->uc_fd >= 0)
close(uctx->uc_fd);
unlink(uctx->uc_sun.sun_path);
uctx->uc_magic = 0;
free(uctx);
}
static struct hast_proto uds_proto = {
.hp_name = "uds",
.hp_client = uds_client,
.hp_connect = uds_connect,
.hp_server = uds_server,
.hp_accept = uds_accept,
.hp_send = uds_send,
.hp_recv = uds_recv,
.hp_descriptor = uds_descriptor,
.hp_address_match = uds_address_match,
.hp_local_address = uds_local_address,
.hp_remote_address = uds_remote_address,
.hp_close = uds_close
};
static __constructor void
uds_ctor(void)
{
proto_register(&uds_proto);
}

137
sbin/hastd/rangelock.c Normal file
View File

@ -0,0 +1,137 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/queue.h>
#include <assert.h>
#include <stdbool.h>
#include <stdlib.h>
#include <unistd.h>
#include "rangelock.h"
#define RANGELOCKS_MAGIC 0x94310c
struct rangelocks {
int rls_magic; /* Magic value. */
TAILQ_HEAD(, rlock) rls_locks; /* List of locked ranges. */
};
struct rlock {
off_t rl_start;
off_t rl_end;
TAILQ_ENTRY(rlock) rl_next;
};
int
rangelock_init(struct rangelocks **rlsp)
{
struct rangelocks *rls;
assert(rlsp != NULL);
rls = malloc(sizeof(*rls));
if (rls == NULL)
return (-1);
TAILQ_INIT(&rls->rls_locks);
rls->rls_magic = RANGELOCKS_MAGIC;
*rlsp = rls;
return (0);
}
void
rangelock_free(struct rangelocks *rls)
{
struct rlock *rl;
assert(rls->rls_magic == RANGELOCKS_MAGIC);
rls->rls_magic = 0;
while ((rl = TAILQ_FIRST(&rls->rls_locks)) != NULL) {
TAILQ_REMOVE(&rls->rls_locks, rl, rl_next);
free(rl);
}
free(rls);
}
int
rangelock_add(struct rangelocks *rls, off_t offset, off_t length)
{
struct rlock *rl;
assert(rls->rls_magic == RANGELOCKS_MAGIC);
rl = malloc(sizeof(*rl));
if (rl == NULL)
return (-1);
rl->rl_start = offset;
rl->rl_end = offset + length;
TAILQ_INSERT_TAIL(&rls->rls_locks, rl, rl_next);
return (0);
}
void
rangelock_del(struct rangelocks *rls, off_t offset, off_t length)
{
struct rlock *rl;
assert(rls->rls_magic == RANGELOCKS_MAGIC);
TAILQ_FOREACH(rl, &rls->rls_locks, rl_next) {
if (rl->rl_start == offset && rl->rl_end == offset + length)
break;
}
assert(rl != NULL);
TAILQ_REMOVE(&rls->rls_locks, rl, rl_next);
free(rl);
}
bool
rangelock_islocked(struct rangelocks *rls, off_t offset, off_t length)
{
struct rlock *rl;
assert(rls->rls_magic == RANGELOCKS_MAGIC);
TAILQ_FOREACH(rl, &rls->rls_locks, rl_next) {
if (rl->rl_start >= offset && rl->rl_start < offset + length)
break;
else if (rl->rl_end > offset && rl->rl_end <= offset + length)
break;
else if (rl->rl_start < offset && rl->rl_end > offset + length)
break;
}
return (rl != NULL);
}

46
sbin/hastd/rangelock.h Normal file
View File

@ -0,0 +1,46 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _RANGELOCK_H_
#define _RANGELOCK_H_
#include <stdbool.h>
#include <unistd.h>
struct rangelocks;
int rangelock_init(struct rangelocks **rlsp);
void rangelock_free(struct rangelocks *rls);
int rangelock_add(struct rangelocks *rls, off_t offset, off_t length);
void rangelock_del(struct rangelocks *rls, off_t offset, off_t length);
bool rangelock_islocked(struct rangelocks *rls, off_t offset, off_t length);
#endif /* !_RANGELOCK_H_ */

697
sbin/hastd/secondary.c Normal file
View File

@ -0,0 +1,697 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/time.h>
#include <sys/bio.h>
#include <sys/disk.h>
#include <sys/stat.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <libgeom.h>
#include <pthread.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <activemap.h>
#include <nv.h>
#include <pjdlog.h>
#include "control.h"
#include "hast.h"
#include "hast_proto.h"
#include "hastd.h"
#include "metadata.h"
#include "proto.h"
#include "subr.h"
#include "synch.h"
struct hio {
uint64_t hio_seq;
int hio_error;
struct nv *hio_nv;
void *hio_data;
uint8_t hio_cmd;
uint64_t hio_offset;
uint64_t hio_length;
TAILQ_ENTRY(hio) hio_next;
};
/*
* Free list holds unused structures. When free list is empty, we have to wait
* until some in-progress requests are freed.
*/
static TAILQ_HEAD(, hio) hio_free_list;
static pthread_mutex_t hio_free_list_lock;
static pthread_cond_t hio_free_list_cond;
/*
* Disk thread (the one that do I/O requests) takes requests from this list.
*/
static TAILQ_HEAD(, hio) hio_disk_list;
static pthread_mutex_t hio_disk_list_lock;
static pthread_cond_t hio_disk_list_cond;
/*
* There is one recv list for every component, although local components don't
* use recv lists as local requests are done synchronously.
*/
static TAILQ_HEAD(, hio) hio_send_list;
static pthread_mutex_t hio_send_list_lock;
static pthread_cond_t hio_send_list_cond;
/*
* Maximum number of outstanding I/O requests.
*/
#define HAST_HIO_MAX 256
static void *recv_thread(void *arg);
static void *disk_thread(void *arg);
static void *send_thread(void *arg);
static void
init_environment(void)
{
struct hio *hio;
unsigned int ii;
/*
* Initialize lists, their locks and theirs condition variables.
*/
TAILQ_INIT(&hio_free_list);
mtx_init(&hio_free_list_lock);
cv_init(&hio_free_list_cond);
TAILQ_INIT(&hio_disk_list);
mtx_init(&hio_disk_list_lock);
cv_init(&hio_disk_list_cond);
TAILQ_INIT(&hio_send_list);
mtx_init(&hio_send_list_lock);
cv_init(&hio_send_list_cond);
/*
* Allocate requests pool and initialize requests.
*/
for (ii = 0; ii < HAST_HIO_MAX; ii++) {
hio = malloc(sizeof(*hio));
if (hio == NULL) {
errx(EX_TEMPFAIL, "cannot allocate %zu bytes of memory "
"for hio request", sizeof(*hio));
}
hio->hio_error = 0;
hio->hio_data = malloc(MAXPHYS);
if (hio->hio_data == NULL) {
errx(EX_TEMPFAIL, "cannot allocate %zu bytes of memory "
"for gctl_data", (size_t)MAXPHYS);
}
TAILQ_INSERT_HEAD(&hio_free_list, hio, hio_next);
}
}
static void
init_local(struct hast_resource *res)
{
if (metadata_read(res, true) < 0)
exit(EX_NOINPUT);
}
static void
init_remote(struct hast_resource *res, struct nv *nvin)
{
uint64_t resuid;
struct nv *nvout;
unsigned char *map;
size_t mapsize;
map = NULL;
mapsize = 0;
nvout = nv_alloc();
nv_add_int64(nvout, (int64_t)res->hr_datasize, "datasize");
nv_add_int32(nvout, (int32_t)res->hr_extentsize, "extentsize");
resuid = nv_get_uint64(nvin, "resuid");
res->hr_primary_localcnt = nv_get_uint64(nvin, "localcnt");
res->hr_primary_remotecnt = nv_get_uint64(nvin, "remotecnt");
nv_add_uint64(nvout, res->hr_secondary_localcnt, "localcnt");
nv_add_uint64(nvout, res->hr_secondary_remotecnt, "remotecnt");
mapsize = activemap_calc_ondisk_size(res->hr_local_mediasize -
METADATA_SIZE, res->hr_extentsize, res->hr_local_sectorsize);
map = malloc(mapsize);
if (map == NULL) {
pjdlog_exitx(EX_TEMPFAIL,
"Unable to allocate memory (%zu bytes) for activemap.",
mapsize);
}
nv_add_uint32(nvout, (uint32_t)mapsize, "mapsize");
/*
* When we work as primary and secondary is missing we will increase
* localcnt in our metadata. When secondary is connected and synced
* we make localcnt be equal to remotecnt, which means nodes are more
* or less in sync.
* Split-brain condition is when both nodes are not able to communicate
* and are both configured as primary nodes. In turn, they can both
* make incompatible changes to the data and we have to detect that.
* Under split-brain condition we will increase our localcnt on first
* write and remote node will increase its localcnt on first write.
* When we connect we can see that primary's localcnt is greater than
* our remotecnt (primary was modified while we weren't watching) and
* our localcnt is greater than primary's remotecnt (we were modified
* while primary wasn't watching).
* There are many possible combinations which are all gathered below.
* Don't pay too much attention to exact numbers, the more important
* is to compare them. We compare secondary's local with primary's
* remote and secondary's remote with primary's local.
* Note that every case where primary's localcnt is smaller than
* secondary's remotecnt and where secondary's localcnt is smaller than
* primary's remotecnt should be impossible in practise. We will perform
* full synchronization then. Those cases are marked with an asterisk.
* Regular synchronization means that only extents marked as dirty are
* synchronized (regular synchronization).
*
* SECONDARY METADATA PRIMARY METADATA
* local=3 remote=3 local=2 remote=2* ?! Full sync from secondary.
* local=3 remote=3 local=2 remote=3* ?! Full sync from primary.
* local=3 remote=3 local=2 remote=4* ?! Full sync from primary.
* local=3 remote=3 local=3 remote=2 Primary is out-of-date,
* regular sync from secondary.
* local=3 remote=3 local=3 remote=3 Regular sync just in case.
* local=3 remote=3 local=3 remote=4* ?! Full sync from primary.
* local=3 remote=3 local=4 remote=2 Split-brain condition.
* local=3 remote=3 local=4 remote=3 Secondary out-of-date,
* regular sync from primary.
* local=3 remote=3 local=4 remote=4* ?! Full sync from primary.
*/
if (res->hr_resuid == 0) {
/*
* Provider is used for the first time. Initialize everything.
*/
assert(res->hr_secondary_localcnt == 0);
res->hr_resuid = resuid;
if (metadata_write(res) < 0)
exit(EX_NOINPUT);
memset(map, 0xff, mapsize);
nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc");
} else if (
/* Is primary is out-of-date? */
(res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
res->hr_secondary_remotecnt == res->hr_primary_localcnt) ||
/* Node are more or less in sync? */
(res->hr_secondary_localcnt == res->hr_primary_remotecnt &&
res->hr_secondary_remotecnt == res->hr_primary_localcnt) ||
/* Is secondary is out-of-date? */
(res->hr_secondary_localcnt == res->hr_primary_remotecnt &&
res->hr_secondary_remotecnt < res->hr_primary_localcnt)) {
/*
* Nodes are more or less in sync or one of the nodes is
* out-of-date.
* It doesn't matter at this point which one, we just have to
* send out local bitmap to the remote node.
*/
if (pread(res->hr_localfd, map, mapsize, METADATA_SIZE) !=
(ssize_t)mapsize) {
pjdlog_exit(LOG_ERR, "Unable to read activemap");
}
if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
res->hr_secondary_remotecnt == res->hr_primary_localcnt) {
/* Primary is out-of-date, sync from secondary. */
nv_add_uint8(nvout, HAST_SYNCSRC_SECONDARY, "syncsrc");
} else {
/*
* Secondary is out-of-date or counts match.
* Sync from primary.
*/
nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc");
}
} else if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
res->hr_primary_localcnt > res->hr_secondary_remotecnt) {
/*
* Not good, we have split-brain condition.
*/
pjdlog_error("Split-brain detected, exiting.");
nv_add_string(nvout, "Split-brain condition!", "errmsg");
free(map);
map = NULL;
mapsize = 0;
} else /* if (res->hr_secondary_localcnt < res->hr_primary_remotecnt ||
res->hr_primary_localcnt < res->hr_secondary_remotecnt) */ {
/*
* This should never happen in practise, but we will perform
* full synchronization.
*/
assert(res->hr_secondary_localcnt < res->hr_primary_remotecnt ||
res->hr_primary_localcnt < res->hr_secondary_remotecnt);
mapsize = activemap_calc_ondisk_size(res->hr_local_mediasize -
METADATA_SIZE, res->hr_extentsize,
res->hr_local_sectorsize);
memset(map, 0xff, mapsize);
if (res->hr_secondary_localcnt > res->hr_primary_remotecnt) {
/* In this one of five cases sync from secondary. */
nv_add_uint8(nvout, HAST_SYNCSRC_SECONDARY, "syncsrc");
} else {
/* For the rest four cases sync from primary. */
nv_add_uint8(nvout, HAST_SYNCSRC_PRIMARY, "syncsrc");
}
pjdlog_warning("This should never happen, asking for full synchronization (primary(local=%ju, remote=%ju), secondary(local=%ju, remote=%ju)).",
(uintmax_t)res->hr_primary_localcnt,
(uintmax_t)res->hr_primary_remotecnt,
(uintmax_t)res->hr_secondary_localcnt,
(uintmax_t)res->hr_secondary_remotecnt);
}
if (hast_proto_send(res, res->hr_remotein, nvout, map, mapsize) < 0) {
pjdlog_errno(LOG_WARNING, "Unable to send activemap to %s",
res->hr_remoteaddr);
nv_free(nvout);
exit(EX_TEMPFAIL);
}
if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
res->hr_primary_localcnt > res->hr_secondary_remotecnt) {
/* Exit on split-brain. */
exit(EX_CONFIG);
}
}
void
hastd_secondary(struct hast_resource *res, struct nv *nvin)
{
pthread_t td;
pid_t pid;
int error;
/*
* Create communication channel between parent and child.
*/
if (proto_client("socketpair://", &res->hr_ctrl) < 0) {
KEEP_ERRNO((void)pidfile_remove(pfh));
pjdlog_exit(EX_OSERR,
"Unable to create control sockets between parent and child");
}
pid = fork();
if (pid < 0) {
KEEP_ERRNO((void)pidfile_remove(pfh));
pjdlog_exit(EX_OSERR, "Unable to fork");
}
if (pid > 0) {
/* This is parent. */
proto_close(res->hr_remotein);
res->hr_remotein = NULL;
proto_close(res->hr_remoteout);
res->hr_remoteout = NULL;
res->hr_workerpid = pid;
return;
}
(void)pidfile_close(pfh);
setproctitle("%s (secondary)", res->hr_name);
init_local(res);
init_remote(res, nvin);
init_environment();
error = pthread_create(&td, NULL, recv_thread, res);
assert(error == 0);
error = pthread_create(&td, NULL, disk_thread, res);
assert(error == 0);
error = pthread_create(&td, NULL, send_thread, res);
assert(error == 0);
(void)ctrl_thread(res);
}
static void
reqlog(int loglevel, int debuglevel, int error, struct hio *hio, const char *fmt, ...)
{
char msg[1024];
va_list ap;
int len;
va_start(ap, fmt);
len = vsnprintf(msg, sizeof(msg), fmt, ap);
va_end(ap);
if ((size_t)len < sizeof(msg)) {
switch (hio->hio_cmd) {
case HIO_READ:
(void)snprintf(msg + len, sizeof(msg) - len,
"READ(%ju, %ju).", (uintmax_t)hio->hio_offset,
(uintmax_t)hio->hio_length);
break;
case HIO_DELETE:
(void)snprintf(msg + len, sizeof(msg) - len,
"DELETE(%ju, %ju).", (uintmax_t)hio->hio_offset,
(uintmax_t)hio->hio_length);
break;
case HIO_FLUSH:
(void)snprintf(msg + len, sizeof(msg) - len, "FLUSH.");
break;
case HIO_WRITE:
(void)snprintf(msg + len, sizeof(msg) - len,
"WRITE(%ju, %ju).", (uintmax_t)hio->hio_offset,
(uintmax_t)hio->hio_length);
break;
default:
(void)snprintf(msg + len, sizeof(msg) - len,
"UNKNOWN(%u).", (unsigned int)hio->hio_cmd);
break;
}
}
pjdlog_common(loglevel, debuglevel, error, "%s", msg);
}
static int
requnpack(struct hast_resource *res, struct hio *hio)
{
hio->hio_cmd = nv_get_uint8(hio->hio_nv, "cmd");
if (hio->hio_cmd == 0) {
pjdlog_error("Header contains no 'cmd' field.");
hio->hio_error = EINVAL;
goto end;
}
switch (hio->hio_cmd) {
case HIO_READ:
case HIO_WRITE:
case HIO_DELETE:
hio->hio_offset = nv_get_uint64(hio->hio_nv, "offset");
if (nv_error(hio->hio_nv) != 0) {
pjdlog_error("Header is missing 'offset' field.");
hio->hio_error = EINVAL;
goto end;
}
hio->hio_length = nv_get_uint64(hio->hio_nv, "length");
if (nv_error(hio->hio_nv) != 0) {
pjdlog_error("Header is missing 'length' field.");
hio->hio_error = EINVAL;
goto end;
}
if (hio->hio_length == 0) {
pjdlog_error("Data length is zero.");
hio->hio_error = EINVAL;
goto end;
}
if (hio->hio_length > MAXPHYS) {
pjdlog_error("Data length is too large (%ju > %ju).",
(uintmax_t)hio->hio_length, (uintmax_t)MAXPHYS);
hio->hio_error = EINVAL;
goto end;
}
if ((hio->hio_offset % res->hr_local_sectorsize) != 0) {
pjdlog_error("Offset %ju is not multiple of sector size.",
(uintmax_t)hio->hio_offset);
hio->hio_error = EINVAL;
goto end;
}
if ((hio->hio_length % res->hr_local_sectorsize) != 0) {
pjdlog_error("Length %ju is not multiple of sector size.",
(uintmax_t)hio->hio_length);
hio->hio_error = EINVAL;
goto end;
}
if (hio->hio_offset + hio->hio_length >
(uint64_t)res->hr_datasize) {
pjdlog_error("Data offset is too large (%ju > %ju).",
(uintmax_t)(hio->hio_offset + hio->hio_length),
(uintmax_t)res->hr_datasize);
hio->hio_error = EINVAL;
goto end;
}
break;
default:
pjdlog_error("Header contains invalid 'cmd' (%hhu).",
hio->hio_cmd);
hio->hio_error = EINVAL;
goto end;
}
hio->hio_error = 0;
end:
return (hio->hio_error);
}
/*
* Thread receives requests from the primary node.
*/
static void *
recv_thread(void *arg)
{
struct hast_resource *res = arg;
struct hio *hio;
bool wakeup;
for (;;) {
pjdlog_debug(2, "recv: Taking free request.");
mtx_lock(&hio_free_list_lock);
while ((hio = TAILQ_FIRST(&hio_free_list)) == NULL) {
pjdlog_debug(2, "recv: No free requests, waiting.");
cv_wait(&hio_free_list_cond, &hio_free_list_lock);
}
TAILQ_REMOVE(&hio_free_list, hio, hio_next);
mtx_unlock(&hio_free_list_lock);
pjdlog_debug(2, "recv: (%p) Got request.", hio);
if (hast_proto_recv_hdr(res->hr_remotein, &hio->hio_nv) < 0) {
pjdlog_exit(EX_TEMPFAIL,
"Unable to receive request header");
}
if (requnpack(res, hio) != 0)
goto send_queue;
reqlog(LOG_DEBUG, 2, -1, hio,
"recv: (%p) Got request header: ", hio);
if (hio->hio_cmd == HIO_WRITE) {
if (hast_proto_recv_data(res, res->hr_remotein,
hio->hio_nv, hio->hio_data, MAXPHYS) < 0) {
pjdlog_exit(EX_TEMPFAIL,
"Unable to receive reply data");
}
}
pjdlog_debug(2, "recv: (%p) Moving request to the disk queue.",
hio);
mtx_lock(&hio_disk_list_lock);
wakeup = TAILQ_EMPTY(&hio_disk_list);
TAILQ_INSERT_TAIL(&hio_disk_list, hio, hio_next);
mtx_unlock(&hio_disk_list_lock);
if (wakeup)
cv_signal(&hio_disk_list_cond);
continue;
send_queue:
pjdlog_debug(2, "recv: (%p) Moving request to the send queue.",
hio);
mtx_lock(&hio_send_list_lock);
wakeup = TAILQ_EMPTY(&hio_send_list);
TAILQ_INSERT_TAIL(&hio_send_list, hio, hio_next);
mtx_unlock(&hio_send_list_lock);
if (wakeup)
cv_signal(&hio_send_list_cond);
}
/* NOTREACHED */
return (NULL);
}
/*
* Thread reads from or writes to local component and also handles DELETE and
* FLUSH requests.
*/
static void *
disk_thread(void *arg)
{
struct hast_resource *res = arg;
struct hio *hio;
ssize_t ret;
bool clear_activemap, wakeup;
clear_activemap = true;
for (;;) {
pjdlog_debug(2, "disk: Taking request.");
mtx_lock(&hio_disk_list_lock);
while ((hio = TAILQ_FIRST(&hio_disk_list)) == NULL) {
pjdlog_debug(2, "disk: No requests, waiting.");
cv_wait(&hio_disk_list_cond, &hio_disk_list_lock);
}
TAILQ_REMOVE(&hio_disk_list, hio, hio_next);
mtx_unlock(&hio_disk_list_lock);
while (clear_activemap) {
unsigned char *map;
size_t mapsize;
/*
* When first request is received, it means that primary
* already received our activemap, merged it and stored
* locally. We can now safely clear our activemap.
*/
mapsize =
activemap_calc_ondisk_size(res->hr_local_mediasize -
METADATA_SIZE, res->hr_extentsize,
res->hr_local_sectorsize);
map = calloc(1, mapsize);
if (map == NULL) {
pjdlog_warning("Unable to allocate memory to clear local activemap.");
break;
}
if (pwrite(res->hr_localfd, map, mapsize,
METADATA_SIZE) != (ssize_t)mapsize) {
pjdlog_errno(LOG_WARNING,
"Unable to store cleared activemap");
free(map);
break;
}
free(map);
clear_activemap = false;
pjdlog_debug(1, "Local activemap cleared.");
}
reqlog(LOG_DEBUG, 2, -1, hio, "disk: (%p) Got request: ", hio);
/* Handle the actual request. */
switch (hio->hio_cmd) {
case HIO_READ:
ret = pread(res->hr_localfd, hio->hio_data,
hio->hio_length,
hio->hio_offset + res->hr_localoff);
if (ret < 0)
hio->hio_error = errno;
else if (ret != (int64_t)hio->hio_length)
hio->hio_error = EIO;
else
hio->hio_error = 0;
break;
case HIO_WRITE:
ret = pwrite(res->hr_localfd, hio->hio_data,
hio->hio_length,
hio->hio_offset + res->hr_localoff);
if (ret < 0)
hio->hio_error = errno;
else if (ret != (int64_t)hio->hio_length)
hio->hio_error = EIO;
else
hio->hio_error = 0;
break;
case HIO_DELETE:
ret = g_delete(res->hr_localfd,
hio->hio_offset + res->hr_localoff,
hio->hio_length);
if (ret < 0)
hio->hio_error = errno;
else
hio->hio_error = 0;
break;
case HIO_FLUSH:
ret = g_flush(res->hr_localfd);
if (ret < 0)
hio->hio_error = errno;
else
hio->hio_error = 0;
break;
}
if (hio->hio_error != 0) {
reqlog(LOG_ERR, 0, hio->hio_error, hio,
"Request failed: ");
}
pjdlog_debug(2, "disk: (%p) Moving request to the send queue.",
hio);
mtx_lock(&hio_send_list_lock);
wakeup = TAILQ_EMPTY(&hio_send_list);
TAILQ_INSERT_TAIL(&hio_send_list, hio, hio_next);
mtx_unlock(&hio_send_list_lock);
if (wakeup)
cv_signal(&hio_send_list_cond);
}
/* NOTREACHED */
return (NULL);
}
/*
* Thread sends requests back to primary node.
*/
static void *
send_thread(void *arg)
{
struct hast_resource *res = arg;
struct nv *nvout;
struct hio *hio;
void *data;
size_t length;
bool wakeup;
for (;;) {
pjdlog_debug(2, "send: Taking request.");
mtx_lock(&hio_send_list_lock);
while ((hio = TAILQ_FIRST(&hio_send_list)) == NULL) {
pjdlog_debug(2, "send: No requests, waiting.");
cv_wait(&hio_send_list_cond, &hio_send_list_lock);
}
TAILQ_REMOVE(&hio_send_list, hio, hio_next);
mtx_unlock(&hio_send_list_lock);
reqlog(LOG_DEBUG, 2, -1, hio, "send: (%p) Got request: ", hio);
nvout = nv_alloc();
/* Copy sequence number. */
nv_add_uint64(nvout, nv_get_uint64(hio->hio_nv, "seq"), "seq");
switch (hio->hio_cmd) {
case HIO_READ:
if (hio->hio_error == 0) {
data = hio->hio_data;
length = hio->hio_length;
break;
}
/*
* We send no data in case of an error.
*/
/* FALLTHROUGH */
case HIO_DELETE:
case HIO_FLUSH:
case HIO_WRITE:
data = NULL;
length = 0;
break;
default:
abort();
break;
}
if (hio->hio_error != 0)
nv_add_int16(nvout, hio->hio_error, "error");
if (hast_proto_send(res, res->hr_remoteout, nvout, data,
length) < 0) {
pjdlog_exit(EX_TEMPFAIL, "Unable to send reply.");
}
nv_free(nvout);
pjdlog_debug(2, "disk: (%p) Moving request to the free queue.",
hio);
nv_free(hio->hio_nv);
hio->hio_error = 0;
mtx_lock(&hio_free_list_lock);
wakeup = TAILQ_EMPTY(&hio_free_list);
TAILQ_INSERT_TAIL(&hio_free_list, hio, hio_next);
mtx_unlock(&hio_free_list_lock);
if (wakeup)
cv_signal(&hio_free_list_cond);
}
/* NOTREACHED */
return (NULL);
}

118
sbin/hastd/subr.c Normal file
View File

@ -0,0 +1,118 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/disk.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <assert.h>
#include <errno.h>
#include <fcntl.h>
#include <pjdlog.h>
#include "hast.h"
#include "subr.h"
int
provinfo(struct hast_resource *res, bool dowrite)
{
struct stat sb;
assert(res->hr_localpath != NULL && res->hr_localpath[0] != '\0');
if (res->hr_localfd == -1) {
res->hr_localfd = open(res->hr_localpath,
dowrite ? O_RDWR : O_RDONLY);
if (res->hr_localfd < 0) {
KEEP_ERRNO(pjdlog_errno(LOG_ERR, "Unable to open %s",
res->hr_localpath));
return (-1);
}
}
if (fstat(res->hr_localfd, &sb) < 0) {
KEEP_ERRNO(pjdlog_errno(LOG_ERR, "Unable to stat %s",
res->hr_localpath));
return (-1);
}
if (S_ISCHR(sb.st_mode)) {
/*
* If this is character device, it is most likely GEOM provider.
*/
if (ioctl(res->hr_localfd, DIOCGMEDIASIZE,
&res->hr_local_mediasize) < 0) {
KEEP_ERRNO(pjdlog_errno(LOG_ERR,
"Unable obtain provider %s mediasize",
res->hr_localpath));
return (-1);
}
if (ioctl(res->hr_localfd, DIOCGSECTORSIZE,
&res->hr_local_sectorsize) < 0) {
KEEP_ERRNO(pjdlog_errno(LOG_ERR,
"Unable obtain provider %s sectorsize",
res->hr_localpath));
return (-1);
}
} else if (S_ISREG(sb.st_mode)) {
/*
* We also support regular files for which we hardcode
* sector size of 512 bytes.
*/
res->hr_local_mediasize = sb.st_size;
res->hr_local_sectorsize = 512;
} else {
/*
* We support no other file types.
*/
pjdlog_error("%s is neither GEOM provider nor regular file.",
res->hr_localpath);
errno = EFTYPE;
return (-1);
}
return (0);
}
const char *
role2str(int role)
{
switch (role) {
case HAST_ROLE_INIT:
return ("init");
case HAST_ROLE_PRIMARY:
return ("primary");
case HAST_ROLE_SECONDARY:
return ("secondary");
}
return ("unknown");
}

51
sbin/hastd/subr.h Normal file
View File

@ -0,0 +1,51 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SUBR_H_
#define _SUBR_H_
#include <sys/types.h>
#include <stdbool.h>
#include "hast.h"
#define KEEP_ERRNO(work) do { \
int _rerrno; \
\
_rerrno = errno; \
work; \
errno = _rerrno; \
} while (0)
int provinfo(struct hast_resource *res, bool dowrite);
const char *role2str(int role);
#endif /* !_SUBR_H_ */

162
sbin/hastd/synch.h Normal file
View File

@ -0,0 +1,162 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYNCH_H_
#define _SYNCH_H_
#include <assert.h>
#include <pthread.h>
#include <stdbool.h>
#include <time.h>
static __inline void
mtx_init(pthread_mutex_t *lock)
{
int error;
error = pthread_mutex_init(lock, NULL);
assert(error == 0);
}
static __inline void
mtx_lock(pthread_mutex_t *lock)
{
int error;
error = pthread_mutex_lock(lock);
assert(error == 0);
}
static __inline bool
mtx_trylock(pthread_mutex_t *lock)
{
int error;
error = pthread_mutex_trylock(lock);
assert(error == 0 || error == EBUSY);
return (error == 0);
}
static __inline void
mtx_unlock(pthread_mutex_t *lock)
{
int error;
error = pthread_mutex_unlock(lock);
assert(error == 0);
}
static __inline void
rw_init(pthread_rwlock_t *lock)
{
int error;
error = pthread_rwlock_init(lock, NULL);
assert(error == 0);
}
static __inline void
rw_rlock(pthread_rwlock_t *lock)
{
int error;
error = pthread_rwlock_rdlock(lock);
assert(error == 0);
}
static __inline void
rw_wlock(pthread_rwlock_t *lock)
{
int error;
error = pthread_rwlock_wrlock(lock);
assert(error == 0);
}
static __inline void
rw_unlock(pthread_rwlock_t *lock)
{
int error;
error = pthread_rwlock_unlock(lock);
assert(error == 0);
}
static __inline void
cv_init(pthread_cond_t *cv)
{
pthread_condattr_t attr;
int error;
error = pthread_condattr_init(&attr);
assert(error == 0);
error = pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
assert(error == 0);
error = pthread_cond_init(cv, &attr);
assert(error == 0);
}
static __inline void
cv_wait(pthread_cond_t *cv, pthread_mutex_t *lock)
{
int error;
error = pthread_cond_wait(cv, lock);
assert(error == 0);
}
static __inline bool
cv_timedwait(pthread_cond_t *cv, pthread_mutex_t *lock, int timeout)
{
struct timespec ts;
int error;
if (timeout == 0) {
cv_wait(cv, lock);
return (false);
}
error = clock_gettime(CLOCK_MONOTONIC, &ts);
assert(error == 0);
ts.tv_sec += timeout;
error = pthread_cond_timedwait(cv, lock, &ts);
assert(error == 0 || error == ETIMEDOUT);
return (error == ETIMEDOUT);
}
static __inline void
cv_signal(pthread_cond_t *cv)
{
int error;
error = pthread_cond_signal(cv);
assert(error == 0);
}
static __inline void
cv_broadcast(pthread_cond_t *cv)
{
int error;
error = pthread_cond_broadcast(cv);
assert(error == 0);
}
#endif /* !_SYNCH_H_ */

66
sbin/hastd/token.l Normal file
View File

@ -0,0 +1,66 @@
%{
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
* the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdio.h>
#include <string.h>
#include "hast.h"
#include "y.tab.h"
int depth;
int lineno;
#define DP do { } while (0)
%}
%%
control { DP; return CONTROL; }
listen { DP; return LISTEN; }
port { DP; return PORT; }
replication { DP; return REPLICATION; }
resource { DP; return RESOURCE; }
name { DP; return NAME; }
local { DP; return LOCAL; }
remote { DP; return REMOTE; }
on { DP; return ON; }
fullsync { DP; return FULLSYNC; }
memsync { DP; return MEMSYNC; }
async { DP; return ASYNC; }
[0-9]+ { DP; yylval.num = atoi(yytext); return NUM; }
[a-zA-Z0-9\.\-_/\:]+ { DP; yylval.str = strdup(yytext); return STR; }
\{ { DP; depth++; return OB; }
\} { DP; depth--; return CB; }
#.*$ /* ignore comments */;
\n { lineno++; }
[ \t]+ /* ignore whitespace */;
%%

View File

@ -13,6 +13,7 @@ LDIRS= BSD_daemon \
drivers \
etc \
find_interface \
hast \
ibcs2 \
ipfw \
kld \
@ -69,6 +70,11 @@ XFILES= BSD_daemon/FreeBSD.pfa \
find_interface/Makefile \
find_interface/README \
find_interface/find_interface.c \
hast/ucarp.sh \
hast/ucarp_down.sh \
hast/ucarp_up.sh \
hast/vip-down.sh \
hast/vip-up.sh \
ibcs2/README \
ibcs2/hello.uu \
ipfw/change_rules.sh \

69
share/examples/hast/ucarp.sh Executable file
View File

@ -0,0 +1,69 @@
#!/bin/sh
#
# Copyright (c) 2010 The FreeBSD Foundation
# All rights reserved.
#
# This software was developed by Pawel Jakub Dawidek under sponsorship from
# the FreeBSD Foundation.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $FreeBSD$
# Shared IP address, unused for now.
addr="10.99.0.3"
# Password for UCARP communication.
pass="password"
# First node IP and interface for UCARP communication.
nodea_srcip="10.99.0.1"
nodea_ifnet="bge0"
# Second node IP and interface for UCARP communication.
nodeb_srcip="10.99.0.2"
nodeb_ifnet="em3"
export PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
vhid="1"
upscript="/root/hast/sbin/hastd/vip-up.sh"
downscript="/root/hast/sbin/hastd/vip-down.sh"
ifconfig "${nodea_ifnet}" 2>/dev/null | grep -q "inet ${nodea_srcip} "
if [ $? -eq 0 ]; then
srcip="${nodea_srcip}"
ifnet="${nodea_ifnet}"
node="node A"
fi
ifconfig "${nodeb_ifnet}" 2>/dev/null | grep -q "inet ${nodeb_srcip} "
if [ $? -eq 0 ]; then
if [ -n "${srcip}" -o -n "${ifnet}" ]; then
echo "Unable to determine which node is this (both match)." >/dev/stderr
exit 1
fi
srcip="${nodeb_srcip}"
ifnet="${nodeb_ifnet}"
node="node B"
fi
if [ -z "${srcip}" -o -z "${ifnet}" ]; then
echo "Unable to determine which node is this (none match)." >/dev/stderr
exit 1
fi
ucarp -i ${ifnet} -s ${srcip} -v ${vhid} -a ${addr} -p ${pass} -u "${upscript}" -d "${downscript}"

View File

@ -0,0 +1,98 @@
#!/bin/sh
#
# Copyright (c) 2010 The FreeBSD Foundation
# All rights reserved.
#
# This software was developed by Pawel Jakub Dawidek under sponsorship from
# the FreeBSD Foundation.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $FreeBSD$
# Resource name as defined in /etc/hast.conf.
resource="test"
# Supported file system types: UFS, ZFS
fstype="UFS"
# ZFS pool name. Required only when fstype == ZFS.
pool="test"
# File system mount point. Required only when fstype == UFS.
mountpoint="/mnt/test"
# Name of HAST provider as defined in /etc/hast.conf.
# Required only when fstype == UFS.
device="/dev/hast/${resource}"
export PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
# KIll UP script if it still runs in the background.
sig="TERM"
for i in `jot 30`; do
pgid=`pgrep -f ucarp_up.sh | head -1`
[ -n "${pgid}" ] || break
kill -${sig} -- -${pgid}
sig="KILL"
sleep 1
done
if [ -n "${pgid}" ]; then
logger -p local0.error -t hast "UCARP UP process for resource ${resource} is still running after 30 seconds."
exit 1
fi
logger -p local0.debug -t hast "UCARP UP is not running."
case "${fstype}" in
UFS)
mount | egrep -q "^${device} on "
if [ $? -eq 0 ]; then
# Forcibly unmount file system.
out=`umount -f "${mountpoint}" 2>&1`
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "Unable to unmount file system for resource ${resource}: ${out}."
exit 1
fi
logger -p local0.debug -t hast "File system for resource ${resource} unmounted."
fi
;;
ZFS)
zpool list | egrep -q "^${pool} "
if [ $? -eq 0 ]; then
# Forcibly export file pool.
out=`zpool export -f "${pool}" 2>&1`
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "Unable to export pool for resource ${resource}: ${out}."
exit 1
fi
logger -p local0.debug -t hast "ZFS pool for resource ${resource} exported."
fi
;;
esac
# Change role to secondary for our resource.
out=`hastctl role secondary "${resource}" 2>&1`
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "Unable to change to role to secondary for resource ${resource}: ${out}."
exit 1
fi
logger -p local0.debug -t hast "Role for resource ${resource} changed to secondary."
logger -p local0.info -t hast "Successfully switched to secondary for resource ${resource}."
exit 0

105
share/examples/hast/ucarp_up.sh Executable file
View File

@ -0,0 +1,105 @@
#!/bin/sh
#
# Copyright (c) 2010 The FreeBSD Foundation
# All rights reserved.
#
# This software was developed by Pawel Jakub Dawidek under sponsorship from
# the FreeBSD Foundation.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $FreeBSD$
# Resource name as defined in /etc/hast.conf.
resource="test"
# Supported file system types: UFS, ZFS
fstype="UFS"
# ZFS pool name. Required only when fstype == ZFS.
pool="test"
# File system mount point. Required only when fstype == UFS.
mountpoint="/mnt/test"
# Name of HAST provider as defined in /etc/hast.conf.
device="/dev/hast/${resource}"
export PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
# If there is secondary worker process, it means that remote primary process is
# still running. We have to wait for it to terminate.
for i in `jot 30`; do
pgrep -f "hastd: ${resource} \(secondary\)" >/dev/null 2>&1 || break
sleep 1
done
if pgrep -f "hastd: ${resource} \(secondary\)" >/dev/null 2>&1; then
logger -p local0.error -t hast "Secondary process for resource ${resource} is still running after 30 seconds."
exit 1
fi
logger -p local0.debug -t hast "Secondary process in not running."
# Change role to primary for our resource.
out=`hastctl role primary "${resource}" 2>&1`
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "Unable to change to role to primary for resource ${resource}: ${out}."
exit 1
fi
# Wait few seconds for provider to appear.
for i in `jot 50`; do
[ -c "${device}" ] && break
sleep 0.1
done
if [ ! -c "${device}" ]; then
logger -p local0.error -t hast "Device ${device} didn't appear."
exit 1
fi
logger -p local0.debug -t hast "Role for resource ${resource} changed to primary."
case "${fstype}" in
UFS)
# Check the file system.
fsck -y -t ufs "${device}" >/dev/null 2>&1
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "File system check for resource ${resource} failed."
exit 1
fi
logger -p local0.debug -t hast "File system check for resource ${resource} finished."
# Mount the file system.
out=`mount -t ufs "${device}" "${mountpoint}" 2>&1`
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "File system mount for resource ${resource} failed: ${out}."
exit 1
fi
logger -p local0.debug -t hast "File system for resource ${resource} mounted."
;;
ZFS)
# Import ZFS pool. Do it forcibly as it remembers hostid of
# the other cluster node.
out=`zpool import -f "${pool}" 2>&1`
if [ $? -ne 0 ]; then
logger -p local0.error -t hast "ZFS pool import for resource ${resource} failed: ${out}."
exit 1
fi
logger -p local0.debug -t hast "ZFS pool for resource ${resource} imported."
;;
esac
logger -p local0.info -t hast "Successfully switched to primary for resource ${resource}."
exit 0

View File

@ -0,0 +1,5 @@
#!/bin/sh
# $FreeBSD$
/root/hast/sbin/hastd/ucarp_down.sh
exit 0

7
share/examples/hast/vip-up.sh Executable file
View File

@ -0,0 +1,7 @@
#!/bin/sh
# $FreeBSD$
set -m
/root/hast/sbin/hastd/ucarp_up.sh &
set +m
exit 0

View File

@ -24,7 +24,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd November 11, 2009
.Dd February 12, 2010
.Dt RC.CONF 5
.Os
.Sh NAME
@ -1746,6 +1746,27 @@ is set to
.Dq Li YES ,
these are the flags to pass to
.Xr inetd 8 .
.It Va hastd_enable
.Pq Vt bool
If set to
.Dq Li YES ,
run the
.Xr hastd 8
daemon.
.It Va hastd_program
.Pq Vt str
Path to
.Xr hastd 8
(default
.Pa /sbin/hastd ) .
.It Va hastd_flags
.Pq Vt str
If
.Va hastd_enable
is set to
.Dq Li YES ,
these are the flags to pass to
.Xr hastd 8 .
.It Va named_enable
.Pq Vt bool
If set to

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* Copyright (c) 2009-2010 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by Pawel Jakub Dawidek
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -53,9 +57,14 @@ static MALLOC_DEFINE(M_GATE, "gg_data", "GEOM Gate Data");
SYSCTL_DECL(_kern_geom);
SYSCTL_NODE(_kern_geom, OID_AUTO, gate, CTLFLAG_RW, 0, "GEOM_GATE stuff");
static u_int g_gate_debug = 0;
SYSCTL_UINT(_kern_geom_gate, OID_AUTO, debug, CTLFLAG_RW, &g_gate_debug, 0,
static int g_gate_debug = 0;
TUNABLE_INT("kern.geom.gate.debug", &g_gate_debug);
SYSCTL_INT(_kern_geom_gate, OID_AUTO, debug, CTLFLAG_RW, &g_gate_debug, 0,
"Debug level");
static u_int g_gate_maxunits = 256;
TUNABLE_INT("kern.geom.gate.maxunits", &g_gate_maxunits);
SYSCTL_UINT(_kern_geom_gate, OID_AUTO, maxunits, CTLFLAG_RDTUN,
&g_gate_maxunits, 0, "Maximum number of ggate devices");
struct g_class g_gate_class = {
.name = G_GATE_CLASS_NAME,
@ -71,10 +80,9 @@ static struct cdevsw g_gate_cdevsw = {
};
static LIST_HEAD(, g_gate_softc) g_gate_list =
LIST_HEAD_INITIALIZER(g_gate_list);
static struct mtx g_gate_list_mtx;
static struct g_gate_softc **g_gate_units;
static u_int g_gate_nunits;
static struct mtx g_gate_units_lock;
static int
g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
@ -84,13 +92,13 @@ g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
struct bio *bp;
g_topology_assert();
mtx_assert(&g_gate_list_mtx, MA_OWNED);
mtx_assert(&g_gate_units_lock, MA_OWNED);
pp = sc->sc_provider;
if (!force && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
mtx_unlock(&g_gate_list_mtx);
mtx_unlock(&g_gate_units_lock);
return (EBUSY);
}
mtx_unlock(&g_gate_list_mtx);
mtx_unlock(&g_gate_units_lock);
mtx_lock(&sc->sc_queue_mtx);
if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0)
sc->sc_flags |= G_GATE_FLAG_DESTROY;
@ -125,14 +133,15 @@ g_gate_destroy(struct g_gate_softc *sc, boolean_t force)
}
mtx_unlock(&sc->sc_queue_mtx);
g_topology_unlock();
mtx_lock(&g_gate_list_mtx);
mtx_lock(&g_gate_units_lock);
/* One reference is ours. */
sc->sc_ref--;
while (sc->sc_ref > 0) {
msleep(&sc->sc_ref, &g_gate_list_mtx, 0, "gg:destroy", 0);
}
LIST_REMOVE(sc, sc_next);
mtx_unlock(&g_gate_list_mtx);
while (sc->sc_ref > 0)
msleep(&sc->sc_ref, &g_gate_units_lock, 0, "gg:destroy", 0);
g_gate_units[sc->sc_unit] = NULL;
KASSERT(g_gate_nunits > 0, ("negative g_gate_nunits?"));
g_gate_nunits--;
mtx_unlock(&g_gate_units_lock);
mtx_destroy(&sc->sc_queue_mtx);
g_topology_lock();
G_GATE_DEBUG(0, "Device %s destroyed.", gp->name);
@ -196,7 +205,7 @@ g_gate_start(struct bio *bp)
if (sc->sc_queue_count > sc->sc_queue_size) {
mtx_unlock(&sc->sc_queue_mtx);
G_GATE_LOGREQ(1, bp, "Queue full, request canceled.");
g_io_deliver(bp, EIO);
g_io_deliver(bp, ENOMEM);
return;
}
@ -211,18 +220,29 @@ g_gate_start(struct bio *bp)
}
static struct g_gate_softc *
g_gate_hold(u_int unit)
g_gate_hold(u_int unit, const char *name)
{
struct g_gate_softc *sc;
struct g_gate_softc *sc = NULL;
mtx_lock(&g_gate_list_mtx);
LIST_FOREACH(sc, &g_gate_list, sc_next) {
if (sc->sc_unit == unit)
mtx_lock(&g_gate_units_lock);
if (unit >= 0 && unit < g_gate_maxunits)
sc = g_gate_units[unit];
else if (unit == G_GATE_NAME_GIVEN) {
KASSERT(name != NULL, ("name is NULL"));
for (unit = 0; unit < g_gate_maxunits; unit++) {
if (g_gate_units[unit] == NULL)
continue;
if (strcmp(name,
g_gate_units[unit]->sc_provider->name) != 0) {
continue;
}
sc = g_gate_units[unit];
break;
}
}
if (sc != NULL)
sc->sc_ref++;
mtx_unlock(&g_gate_list_mtx);
mtx_unlock(&g_gate_units_lock);
return (sc);
}
@ -231,40 +251,34 @@ g_gate_release(struct g_gate_softc *sc)
{
g_topology_assert_not();
mtx_lock(&g_gate_list_mtx);
mtx_lock(&g_gate_units_lock);
sc->sc_ref--;
KASSERT(sc->sc_ref >= 0, ("Negative sc_ref for %s.", sc->sc_name));
if (sc->sc_ref == 0 && (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0) {
if (sc->sc_ref == 0 && (sc->sc_flags & G_GATE_FLAG_DESTROY) != 0)
wakeup(&sc->sc_ref);
mtx_unlock(&g_gate_list_mtx);
} else {
mtx_unlock(&g_gate_list_mtx);
}
mtx_unlock(&g_gate_units_lock);
}
static int
g_gate_getunit(int unit)
g_gate_getunit(int unit, int *errorp)
{
struct g_gate_softc *sc;
mtx_assert(&g_gate_list_mtx, MA_OWNED);
mtx_assert(&g_gate_units_lock, MA_OWNED);
if (unit >= 0) {
LIST_FOREACH(sc, &g_gate_list, sc_next) {
if (sc->sc_unit == unit)
return (-1);
}
if (unit >= g_gate_maxunits)
*errorp = EINVAL;
else if (g_gate_units[unit] == NULL)
return (unit);
else
*errorp = EEXIST;
} else {
unit = 0;
once_again:
LIST_FOREACH(sc, &g_gate_list, sc_next) {
if (sc->sc_unit == unit) {
if (++unit > 666)
return (-1);
goto once_again;
}
for (unit = 0; unit < g_gate_maxunits; unit++) {
if (g_gate_units[unit] == NULL)
return (unit);
}
*errorp = ENFILE;
}
return (unit);
return (-1);
}
static void
@ -276,7 +290,7 @@ g_gate_guard(void *arg)
sc = arg;
binuptime(&curtime);
g_gate_hold(sc->sc_unit);
g_gate_hold(sc->sc_unit, NULL);
mtx_lock(&sc->sc_queue_mtx);
TAILQ_FOREACH_SAFE(bp, &sc->sc_inqueue.queue, bio_queue, bp2) {
if (curtime.sec - bp->bio_t0.sec < 5)
@ -311,7 +325,7 @@ g_gate_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
sc = gp->softc;
if (sc == NULL || pp != NULL || cp != NULL)
return;
g_gate_hold(sc->sc_unit);
g_gate_hold(sc->sc_unit, NULL);
if ((sc->sc_flags & G_GATE_FLAG_READONLY) != 0) {
sbuf_printf(sb, "%s<access>%s</access>\n", indent, "read-only");
} else if ((sc->sc_flags & G_GATE_FLAG_WRITEONLY) != 0) {
@ -328,6 +342,7 @@ g_gate_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
sbuf_printf(sb, "%s<queue_size>%u</queue_size>\n", indent,
sc->sc_queue_size);
sbuf_printf(sb, "%s<ref>%u</ref>\n", indent, sc->sc_ref);
sbuf_printf(sb, "%s<unit>%d</unit>\n", indent, sc->sc_unit);
g_topology_unlock();
g_gate_release(sc);
g_topology_lock();
@ -339,6 +354,8 @@ g_gate_create(struct g_gate_ctl_create *ggio)
struct g_gate_softc *sc;
struct g_geom *gp;
struct g_provider *pp;
char name[NAME_MAX];
int error = 0, unit;
if (ggio->gctl_mediasize == 0) {
G_GATE_DEBUG(1, "Invalid media size.");
@ -357,15 +374,22 @@ g_gate_create(struct g_gate_ctl_create *ggio)
G_GATE_DEBUG(1, "Invalid flags.");
return (EINVAL);
}
if (ggio->gctl_unit < -1) {
if (ggio->gctl_unit != G_GATE_UNIT_AUTO &&
ggio->gctl_unit != G_GATE_NAME_GIVEN &&
ggio->gctl_unit < 0) {
G_GATE_DEBUG(1, "Invalid unit number.");
return (EINVAL);
}
if (ggio->gctl_unit == G_GATE_NAME_GIVEN &&
ggio->gctl_name[0] == '\0') {
G_GATE_DEBUG(1, "No device name.");
return (EINVAL);
}
sc = malloc(sizeof(*sc), M_GATE, M_WAITOK | M_ZERO);
sc->sc_flags = (ggio->gctl_flags & G_GATE_USERFLAGS);
strlcpy(sc->sc_info, ggio->gctl_info, sizeof(sc->sc_info));
sc->sc_seq = 0;
sc->sc_seq = 1;
bioq_init(&sc->sc_inqueue);
bioq_init(&sc->sc_outqueue);
mtx_init(&sc->sc_queue_mtx, "gg:queue", NULL, MTX_DEF);
@ -375,26 +399,44 @@ g_gate_create(struct g_gate_ctl_create *ggio)
sc->sc_queue_size = G_GATE_MAX_QUEUE_SIZE;
sc->sc_timeout = ggio->gctl_timeout;
callout_init(&sc->sc_callout, CALLOUT_MPSAFE);
mtx_lock(&g_gate_list_mtx);
ggio->gctl_unit = g_gate_getunit(ggio->gctl_unit);
if (ggio->gctl_unit == -1) {
mtx_unlock(&g_gate_list_mtx);
mtx_lock(&g_gate_units_lock);
sc->sc_unit = g_gate_getunit(ggio->gctl_unit, &error);
if (sc->sc_unit < 0) {
mtx_unlock(&g_gate_units_lock);
mtx_destroy(&sc->sc_queue_mtx);
free(sc, M_GATE);
return (EBUSY);
return (error);
}
sc->sc_unit = ggio->gctl_unit;
LIST_INSERT_HEAD(&g_gate_list, sc, sc_next);
mtx_unlock(&g_gate_list_mtx);
if (ggio->gctl_unit == G_GATE_NAME_GIVEN)
snprintf(name, sizeof(name), "%s", ggio->gctl_name);
else {
snprintf(name, sizeof(name), "%s%d", G_GATE_PROVIDER_NAME,
sc->sc_unit);
}
/* Check for name collision. */
for (unit = 0; unit < g_gate_maxunits; unit++) {
if (g_gate_units[unit] == NULL)
continue;
if (strcmp(name, g_gate_units[unit]->sc_provider->name) != 0)
continue;
mtx_unlock(&g_gate_units_lock);
mtx_destroy(&sc->sc_queue_mtx);
free(sc, M_GATE);
return (EEXIST);
}
g_gate_units[sc->sc_unit] = sc;
g_gate_nunits++;
mtx_unlock(&g_gate_units_lock);
ggio->gctl_unit = sc->sc_unit;
g_topology_lock();
gp = g_new_geomf(&g_gate_class, "%s%d", G_GATE_PROVIDER_NAME,
sc->sc_unit);
gp = g_new_geomf(&g_gate_class, "%s", name);
gp->start = g_gate_start;
gp->access = g_gate_access;
gp->dumpconf = g_gate_dumpconf;
gp->softc = sc;
pp = g_new_providerf(gp, "%s%d", G_GATE_PROVIDER_NAME, sc->sc_unit);
pp = g_new_providerf(gp, "%s", name);
pp->mediasize = ggio->gctl_mediasize;
pp->sectorsize = ggio->gctl_sectorsize;
sc->sc_provider = pp;
@ -446,11 +488,11 @@ g_gate_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct threa
struct g_gate_ctl_destroy *ggio = (void *)addr;
G_GATE_CHECK_VERSION(ggio);
sc = g_gate_hold(ggio->gctl_unit);
sc = g_gate_hold(ggio->gctl_unit, ggio->gctl_name);
if (sc == NULL)
return (ENXIO);
g_topology_lock();
mtx_lock(&g_gate_list_mtx);
mtx_lock(&g_gate_units_lock);
error = g_gate_destroy(sc, ggio->gctl_force);
g_topology_unlock();
if (error != 0)
@ -463,7 +505,7 @@ g_gate_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct threa
struct bio *tbp, *lbp;
G_GATE_CHECK_VERSION(ggio);
sc = g_gate_hold(ggio->gctl_unit);
sc = g_gate_hold(ggio->gctl_unit, ggio->gctl_name);
if (sc == NULL)
return (ENXIO);
lbp = NULL;
@ -491,6 +533,8 @@ g_gate_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct threa
break;
}
}
if (ggio->gctl_unit == G_GATE_NAME_GIVEN)
ggio->gctl_unit = sc->sc_unit;
mtx_unlock(&sc->sc_queue_mtx);
g_gate_release(sc);
return (error);
@ -500,7 +544,7 @@ g_gate_ioctl(struct cdev *dev, u_long cmd, caddr_t addr, int flags, struct threa
struct g_gate_ctl_io *ggio = (void *)addr;
G_GATE_CHECK_VERSION(ggio);
sc = g_gate_hold(ggio->gctl_unit);
sc = g_gate_hold(ggio->gctl_unit, NULL);
if (sc == NULL)
return (ENXIO);
error = 0;
@ -561,7 +605,7 @@ start_end:
struct g_gate_ctl_io *ggio = (void *)addr;
G_GATE_CHECK_VERSION(ggio);
sc = g_gate_hold(ggio->gctl_unit);
sc = g_gate_hold(ggio->gctl_unit, NULL);
if (sc == NULL)
return (ENOENT);
error = 0;
@ -631,20 +675,24 @@ g_gate_modevent(module_t mod, int type, void *data)
switch (type) {
case MOD_LOAD:
mtx_init(&g_gate_list_mtx, "gg_list_lock", NULL, MTX_DEF);
mtx_init(&g_gate_units_lock, "gg_units_lock", NULL, MTX_DEF);
g_gate_units = malloc(g_gate_maxunits * sizeof(g_gate_units[0]),
M_GATE, M_WAITOK | M_ZERO);
g_gate_nunits = 0;
g_gate_device();
break;
case MOD_UNLOAD:
mtx_lock(&g_gate_list_mtx);
if (!LIST_EMPTY(&g_gate_list)) {
mtx_unlock(&g_gate_list_mtx);
mtx_lock(&g_gate_units_lock);
if (g_gate_nunits > 0) {
mtx_unlock(&g_gate_units_lock);
error = EBUSY;
break;
}
mtx_unlock(&g_gate_list_mtx);
mtx_destroy(&g_gate_list_mtx);
mtx_unlock(&g_gate_units_lock);
mtx_destroy(&g_gate_units_lock);
if (status_dev != 0)
destroy_dev(status_dev);
free(g_gate_units, M_GATE);
break;
default:
return (EOPNOTSUPP);

View File

@ -1,5 +1,5 @@
/*-
* Copyright (c) 2004-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* Copyright (c) 2004-2009 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@ -41,7 +41,7 @@
#define G_GATE_MOD_NAME "ggate"
#define G_GATE_CTL_NAME "ggctl"
#define G_GATE_VERSION 1
#define G_GATE_VERSION 2
/*
* Maximum number of request that can be stored in
@ -54,6 +54,15 @@
#define G_GATE_FLAG_DESTROY 0x1000
#define G_GATE_USERFLAGS (G_GATE_FLAG_READONLY | G_GATE_FLAG_WRITEONLY)
/*
* Pick unit number automatically in /dev/ggate<unit>.
*/
#define G_GATE_UNIT_AUTO (-1)
/*
* Full provider name is given, so don't use ggate<unit>.
*/
#define G_GATE_NAME_GIVEN (-2)
#define G_GATE_CMD_CREATE _IOWR('m', 0, struct g_gate_ctl_create)
#define G_GATE_CMD_DESTROY _IOWR('m', 1, struct g_gate_ctl_destroy)
#define G_GATE_CMD_CANCEL _IOWR('m', 2, struct g_gate_ctl_cancel)
@ -120,20 +129,23 @@ struct g_gate_ctl_create {
u_int gctl_flags;
u_int gctl_maxcount;
u_int gctl_timeout;
char gctl_name[NAME_MAX];
char gctl_info[G_GATE_INFOSIZE];
int gctl_unit; /* out */
int gctl_unit; /* in/out */
};
struct g_gate_ctl_destroy {
u_int gctl_version;
int gctl_unit;
int gctl_force;
char gctl_name[NAME_MAX];
};
struct g_gate_ctl_cancel {
u_int gctl_version;
int gctl_unit;
uintptr_t gctl_seq;
char gctl_name[NAME_MAX];
};
struct g_gate_ctl_io {