Implement configuration reload on SIGHUP. This includes:

- Load added resources.
- Stop and forget removed resources.
- Update modified resources in least intrusive way, ie. don't touch
  /dev/hast/<name> unless path to local component or provider name were
  modified.

Obtained from:	Wheel Systems Sp. z o.o. http://www.wheelsystems.com
MFC after:	1 month
This commit is contained in:
Pawel Jakub Dawidek 2010-08-05 19:16:31 +00:00
parent c5e960de35
commit 0989854d45
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=210886
3 changed files with 322 additions and 12 deletions

View File

@ -1,5 +1,6 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
* Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
@ -57,13 +58,13 @@ __FBSDID("$FreeBSD$");
#include "subr.h"
/* Path to configuration file. */
static const char *cfgpath = HAST_CONFIG;
const char *cfgpath = HAST_CONFIG;
/* Hastd configuration. */
static struct hastd_config *cfg;
/* Was SIGCHLD signal received? */
static bool sigchld_received = false;
/* Was SIGHUP signal received? */
static bool sighup_received = false;
bool sighup_received = false;
/* Was SIGINT or SIGTERM signal received? */
bool sigexit_received = false;
/* PID file handle. */
@ -169,12 +170,203 @@ child_exit(void)
}
}
static bool
resource_needs_restart(const struct hast_resource *res0,
const struct hast_resource *res1)
{
assert(strcmp(res0->hr_name, res1->hr_name) == 0);
if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
return (true);
if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
return (true);
if (res0->hr_role == HAST_ROLE_INIT ||
res0->hr_role == HAST_ROLE_SECONDARY) {
if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
return (true);
if (res0->hr_replication != res1->hr_replication)
return (true);
if (res0->hr_timeout != res1->hr_timeout)
return (true);
}
return (false);
}
static bool
resource_needs_reload(const struct hast_resource *res0,
const struct hast_resource *res1)
{
assert(strcmp(res0->hr_name, res1->hr_name) == 0);
assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
if (res0->hr_role != HAST_ROLE_PRIMARY)
return (false);
if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
return (true);
if (res0->hr_replication != res1->hr_replication)
return (true);
if (res0->hr_timeout != res1->hr_timeout)
return (true);
return (false);
}
static void
hastd_reload(void)
{
struct hastd_config *newcfg;
struct hast_resource *nres, *cres, *tres;
uint8_t role;
/* TODO */
pjdlog_warning("Configuration reload is not implemented.");
pjdlog_info("Reloading configuration...");
newcfg = yy_config_parse(cfgpath, false);
if (newcfg == NULL)
goto failed;
/*
* Check if control address has changed.
*/
if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
if (proto_server(newcfg->hc_controladdr,
&newcfg->hc_controlconn) < 0) {
pjdlog_errno(LOG_ERR,
"Unable to listen on control address %s",
newcfg->hc_controladdr);
goto failed;
}
}
/*
* Check if listen address has changed.
*/
if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
if (proto_server(newcfg->hc_listenaddr,
&newcfg->hc_listenconn) < 0) {
pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
newcfg->hc_listenaddr);
goto failed;
}
}
/*
* Only when both control and listen sockets are successfully
* initialized switch them to new configuration.
*/
if (newcfg->hc_controlconn != NULL) {
pjdlog_info("Control socket changed from %s to %s.",
cfg->hc_controladdr, newcfg->hc_controladdr);
proto_close(cfg->hc_controlconn);
cfg->hc_controlconn = newcfg->hc_controlconn;
newcfg->hc_controlconn = NULL;
strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
sizeof(cfg->hc_controladdr));
}
if (newcfg->hc_listenconn != NULL) {
pjdlog_info("Listen socket changed from %s to %s.",
cfg->hc_listenaddr, newcfg->hc_listenaddr);
proto_close(cfg->hc_listenconn);
cfg->hc_listenconn = newcfg->hc_listenconn;
newcfg->hc_listenconn = NULL;
strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
sizeof(cfg->hc_listenaddr));
}
/*
* Stop and remove resources that were removed from the configuration.
*/
TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
if (strcmp(cres->hr_name, nres->hr_name) == 0)
break;
}
if (nres == NULL) {
control_set_role(cres, HAST_ROLE_INIT);
TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
pjdlog_info("Resource %s removed.", cres->hr_name);
free(cres);
}
}
/*
* Move new resources to the current configuration.
*/
TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
if (strcmp(cres->hr_name, nres->hr_name) == 0)
break;
}
if (cres == NULL) {
TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
pjdlog_info("Resource %s added.", nres->hr_name);
}
}
/*
* Deal with modified resources.
* Depending on what has changed exactly we might want to perform
* different actions.
*
* We do full resource restart in the following situations:
* Resource role is INIT or SECONDARY.
* Resource role is PRIMARY and path to local component or provider
* name has changed.
* In case of PRIMARY, the worker process will be killed and restarted,
* which also means removing /dev/hast/<name> provider and
* recreating it.
*
* We do just reload (send SIGHUP to worker process) if we act as
* PRIMARY, but only remote address, replication mode and timeout
* has changed. For those, there is no need to restart worker process.
* If PRIMARY receives SIGHUP, it will reconnect if remote address or
* replication mode has changed or simply set new timeout if only
* timeout has changed.
*/
TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
if (strcmp(cres->hr_name, nres->hr_name) == 0)
break;
}
assert(cres != NULL);
if (resource_needs_restart(cres, nres)) {
pjdlog_info("Resource %s configuration was modified, restarting it.",
cres->hr_name);
role = cres->hr_role;
control_set_role(cres, HAST_ROLE_INIT);
TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
free(cres);
TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
control_set_role(nres, role);
} else if (resource_needs_reload(cres, nres)) {
pjdlog_info("Resource %s configuration was modified, reloading it.",
cres->hr_name);
strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
sizeof(cres->hr_remoteaddr));
cres->hr_replication = nres->hr_replication;
cres->hr_timeout = nres->hr_timeout;
if (cres->hr_workerpid != 0) {
if (kill(cres->hr_workerpid, SIGHUP) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to send SIGHUP to worker process %u",
(unsigned int)cres->hr_workerpid);
}
}
}
}
yy_config_free(newcfg);
pjdlog_info("Configuration reloaded successfully.");
return;
failed:
if (newcfg != NULL) {
if (newcfg->hc_controlconn != NULL)
proto_close(newcfg->hc_controlconn);
if (newcfg->hc_listenconn != NULL)
proto_close(newcfg->hc_listenconn);
yy_config_free(newcfg);
}
pjdlog_warning("Configuration not reloaded.");
}
static void
@ -402,10 +594,6 @@ main_loop(void)
fd_set rfds, wfds;
int cfd, lfd, maxfd, ret;
cfd = proto_descriptor(cfg->hc_controlconn);
lfd = proto_descriptor(cfg->hc_listenconn);
maxfd = cfd > lfd ? cfd : lfd;
for (;;) {
if (sigchld_received) {
sigchld_received = false;
@ -416,6 +604,10 @@ main_loop(void)
hastd_reload();
}
cfd = proto_descriptor(cfg->hc_controlconn);
lfd = proto_descriptor(cfg->hc_listenconn);
maxfd = cfd > lfd ? cfd : lfd;
/* Setup descriptors for select(2). */
FD_ZERO(&rfds);
FD_SET(cfd, &rfds);

View File

@ -39,7 +39,8 @@
#include "hast.h"
extern bool sigexit_received;
extern const char *cfgpath;
extern bool sigexit_received, sighup_received;
extern struct pidfh *pfh;
void hastd_primary(struct hast_resource *res);

View File

@ -1,5 +1,6 @@
/*-
* Copyright (c) 2009 The FreeBSD Foundation
* Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
@ -65,6 +66,9 @@ __FBSDID("$FreeBSD$");
#include "subr.h"
#include "synch.h"
/* The is only one remote component for now. */
#define ISREMOTE(no) ((no) == 1)
struct hio {
/*
* Number of components we are still waiting for.
@ -424,6 +428,7 @@ init_environment(struct hast_resource *res __unused)
*/
signal(SIGINT, sighandler);
signal(SIGTERM, sighandler);
signal(SIGHUP, sighandler);
}
static void
@ -1713,6 +1718,9 @@ sighandler(int sig)
case SIGTERM:
sigexit_received = true;
break;
case SIGHUP:
sighup_received = true;
break;
default:
assert(!"invalid condition");
}
@ -1726,6 +1734,114 @@ sighandler(int sig)
mtx_unlock(&hio_guard_lock);
}
static void
config_reload(void)
{
struct hastd_config *newcfg;
struct hast_resource *res;
unsigned int ii, ncomps;
int modified;
pjdlog_info("Reloading configuration...");
ncomps = HAST_NCOMPONENTS;
newcfg = yy_config_parse(cfgpath, false);
if (newcfg == NULL)
goto failed;
TAILQ_FOREACH(res, &newcfg->hc_resources, hr_next) {
if (strcmp(res->hr_name, gres->hr_name) == 0)
break;
}
/*
* If resource was removed from the configuration file, resource
* name, provider name or path to local component was modified we
* shouldn't be here. This means that someone modified configuration
* file and send SIGHUP to us instead of main hastd process.
* Log advice and ignore the signal.
*/
if (res == NULL || strcmp(gres->hr_name, res->hr_name) != 0 ||
strcmp(gres->hr_provname, res->hr_provname) != 0 ||
strcmp(gres->hr_localpath, res->hr_localpath) != 0) {
pjdlog_warning("To reload configuration send SIGHUP to the main hastd process (pid %u).",
(unsigned int)getppid());
goto failed;
}
#define MODIFIED_REMOTEADDR 0x1
#define MODIFIED_REPLICATION 0x2
#define MODIFIED_TIMEOUT 0x4
modified = 0;
if (strcmp(gres->hr_remoteaddr, res->hr_remoteaddr) != 0) {
/*
* Don't copy res->hr_remoteaddr to gres just yet.
* We want remote_close() to log disconnect from the old
* addresses, not from the new ones.
*/
modified |= MODIFIED_REMOTEADDR;
}
if (gres->hr_replication != res->hr_replication) {
gres->hr_replication = res->hr_replication;
modified |= MODIFIED_REPLICATION;
}
if (gres->hr_timeout != res->hr_timeout) {
gres->hr_timeout = res->hr_timeout;
modified |= MODIFIED_TIMEOUT;
}
/*
* If only timeout was modified we only need to change it without
* reconnecting.
*/
if (modified == MODIFIED_TIMEOUT) {
for (ii = 0; ii < ncomps; ii++) {
if (!ISREMOTE(ii))
continue;
rw_rlock(&hio_remote_lock[ii]);
if (!ISCONNECTED(gres, ii)) {
rw_unlock(&hio_remote_lock[ii]);
continue;
}
rw_unlock(&hio_remote_lock[ii]);
if (proto_timeout(gres->hr_remotein,
gres->hr_timeout) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to set connection timeout");
}
if (proto_timeout(gres->hr_remoteout,
gres->hr_timeout) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to set connection timeout");
}
}
} else {
for (ii = 0; ii < ncomps; ii++) {
if (!ISREMOTE(ii))
continue;
remote_close(gres, ii);
}
if (modified & MODIFIED_REMOTEADDR) {
strlcpy(gres->hr_remoteaddr, res->hr_remoteaddr,
sizeof(gres->hr_remoteaddr));
}
}
#undef MODIFIED_REMOTEADDR
#undef MODIFIED_REPLICATION
#undef MODIFIED_TIMEOUT
pjdlog_info("Configuration reloaded successfully.");
return;
failed:
if (newcfg != NULL) {
if (newcfg->hc_controlconn != NULL)
proto_close(newcfg->hc_controlconn);
if (newcfg->hc_listenconn != NULL)
proto_close(newcfg->hc_listenconn);
yy_config_free(newcfg);
}
pjdlog_warning("Configuration not reloaded.");
}
/*
* Thread guards remote connections and reconnects when needed, handles
* signals, etc.
@ -1739,14 +1855,16 @@ guard_thread(void *arg)
int timeout;
ncomps = HAST_NCOMPONENTS;
/* The is only one remote component for now. */
#define ISREMOTE(no) ((no) == 1)
for (;;) {
if (sigexit_received) {
primary_exitx(EX_OK,
"Termination signal received, exiting.");
}
if (sighup_received) {
sighup_received = false;
config_reload();
}
/*
* If all the connection will be fine, we will sleep until
* someone wakes us up.
@ -1810,7 +1928,6 @@ guard_thread(void *arg)
(void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout);
mtx_unlock(&hio_guard_lock);
}
#undef ISREMOTE
/* NOTREACHED */
return (NULL);
}