diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index 6d5ef04e2ab5..31fc3bf22cc4 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2009-2010 The FreeBSD Foundation + * Copyright (c) 2010 Pawel Jakub Dawidek * All rights reserved. * * This software was developed by Pawel Jakub Dawidek under sponsorship from @@ -57,13 +58,13 @@ __FBSDID("$FreeBSD$"); #include "subr.h" /* Path to configuration file. */ -static const char *cfgpath = HAST_CONFIG; +const char *cfgpath = HAST_CONFIG; /* Hastd configuration. */ static struct hastd_config *cfg; /* Was SIGCHLD signal received? */ static bool sigchld_received = false; /* Was SIGHUP signal received? */ -static bool sighup_received = false; +bool sighup_received = false; /* Was SIGINT or SIGTERM signal received? */ bool sigexit_received = false; /* PID file handle. */ @@ -169,12 +170,203 @@ child_exit(void) } } +static bool +resource_needs_restart(const struct hast_resource *res0, + const struct hast_resource *res1) +{ + + assert(strcmp(res0->hr_name, res1->hr_name) == 0); + + if (strcmp(res0->hr_provname, res1->hr_provname) != 0) + return (true); + if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0) + return (true); + if (res0->hr_role == HAST_ROLE_INIT || + res0->hr_role == HAST_ROLE_SECONDARY) { + if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) + return (true); + if (res0->hr_replication != res1->hr_replication) + return (true); + if (res0->hr_timeout != res1->hr_timeout) + return (true); + } + return (false); +} + +static bool +resource_needs_reload(const struct hast_resource *res0, + const struct hast_resource *res1) +{ + + assert(strcmp(res0->hr_name, res1->hr_name) == 0); + assert(strcmp(res0->hr_provname, res1->hr_provname) == 0); + assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0); + + if (res0->hr_role != HAST_ROLE_PRIMARY) + return (false); + + if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0) + return (true); + if (res0->hr_replication != res1->hr_replication) + return (true); + if (res0->hr_timeout != res1->hr_timeout) + return (true); + return (false); +} + static void hastd_reload(void) { + struct hastd_config *newcfg; + struct hast_resource *nres, *cres, *tres; + uint8_t role; - /* TODO */ - pjdlog_warning("Configuration reload is not implemented."); + pjdlog_info("Reloading configuration..."); + + newcfg = yy_config_parse(cfgpath, false); + if (newcfg == NULL) + goto failed; + + /* + * Check if control address has changed. + */ + if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) { + if (proto_server(newcfg->hc_controladdr, + &newcfg->hc_controlconn) < 0) { + pjdlog_errno(LOG_ERR, + "Unable to listen on control address %s", + newcfg->hc_controladdr); + goto failed; + } + } + /* + * Check if listen address has changed. + */ + if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) { + if (proto_server(newcfg->hc_listenaddr, + &newcfg->hc_listenconn) < 0) { + pjdlog_errno(LOG_ERR, "Unable to listen on address %s", + newcfg->hc_listenaddr); + goto failed; + } + } + /* + * Only when both control and listen sockets are successfully + * initialized switch them to new configuration. + */ + if (newcfg->hc_controlconn != NULL) { + pjdlog_info("Control socket changed from %s to %s.", + cfg->hc_controladdr, newcfg->hc_controladdr); + proto_close(cfg->hc_controlconn); + cfg->hc_controlconn = newcfg->hc_controlconn; + newcfg->hc_controlconn = NULL; + strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr, + sizeof(cfg->hc_controladdr)); + } + if (newcfg->hc_listenconn != NULL) { + pjdlog_info("Listen socket changed from %s to %s.", + cfg->hc_listenaddr, newcfg->hc_listenaddr); + proto_close(cfg->hc_listenconn); + cfg->hc_listenconn = newcfg->hc_listenconn; + newcfg->hc_listenconn = NULL; + strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr, + sizeof(cfg->hc_listenaddr)); + } + + /* + * Stop and remove resources that were removed from the configuration. + */ + TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) { + TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) { + if (strcmp(cres->hr_name, nres->hr_name) == 0) + break; + } + if (nres == NULL) { + control_set_role(cres, HAST_ROLE_INIT); + TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); + pjdlog_info("Resource %s removed.", cres->hr_name); + free(cres); + } + } + /* + * Move new resources to the current configuration. + */ + TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { + TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { + if (strcmp(cres->hr_name, nres->hr_name) == 0) + break; + } + if (cres == NULL) { + TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); + TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); + pjdlog_info("Resource %s added.", nres->hr_name); + } + } + /* + * Deal with modified resources. + * Depending on what has changed exactly we might want to perform + * different actions. + * + * We do full resource restart in the following situations: + * Resource role is INIT or SECONDARY. + * Resource role is PRIMARY and path to local component or provider + * name has changed. + * In case of PRIMARY, the worker process will be killed and restarted, + * which also means removing /dev/hast/ provider and + * recreating it. + * + * We do just reload (send SIGHUP to worker process) if we act as + * PRIMARY, but only remote address, replication mode and timeout + * has changed. For those, there is no need to restart worker process. + * If PRIMARY receives SIGHUP, it will reconnect if remote address or + * replication mode has changed or simply set new timeout if only + * timeout has changed. + */ + TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { + TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { + if (strcmp(cres->hr_name, nres->hr_name) == 0) + break; + } + assert(cres != NULL); + if (resource_needs_restart(cres, nres)) { + pjdlog_info("Resource %s configuration was modified, restarting it.", + cres->hr_name); + role = cres->hr_role; + control_set_role(cres, HAST_ROLE_INIT); + TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next); + free(cres); + TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next); + TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next); + control_set_role(nres, role); + } else if (resource_needs_reload(cres, nres)) { + pjdlog_info("Resource %s configuration was modified, reloading it.", + cres->hr_name); + strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr, + sizeof(cres->hr_remoteaddr)); + cres->hr_replication = nres->hr_replication; + cres->hr_timeout = nres->hr_timeout; + if (cres->hr_workerpid != 0) { + if (kill(cres->hr_workerpid, SIGHUP) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to send SIGHUP to worker process %u", + (unsigned int)cres->hr_workerpid); + } + } + } + } + + yy_config_free(newcfg); + pjdlog_info("Configuration reloaded successfully."); + return; +failed: + if (newcfg != NULL) { + if (newcfg->hc_controlconn != NULL) + proto_close(newcfg->hc_controlconn); + if (newcfg->hc_listenconn != NULL) + proto_close(newcfg->hc_listenconn); + yy_config_free(newcfg); + } + pjdlog_warning("Configuration not reloaded."); } static void @@ -402,10 +594,6 @@ main_loop(void) fd_set rfds, wfds; int cfd, lfd, maxfd, ret; - cfd = proto_descriptor(cfg->hc_controlconn); - lfd = proto_descriptor(cfg->hc_listenconn); - maxfd = cfd > lfd ? cfd : lfd; - for (;;) { if (sigchld_received) { sigchld_received = false; @@ -416,6 +604,10 @@ main_loop(void) hastd_reload(); } + cfd = proto_descriptor(cfg->hc_controlconn); + lfd = proto_descriptor(cfg->hc_listenconn); + maxfd = cfd > lfd ? cfd : lfd; + /* Setup descriptors for select(2). */ FD_ZERO(&rfds); FD_SET(cfd, &rfds); diff --git a/sbin/hastd/hastd.h b/sbin/hastd/hastd.h index 199de8c94d5c..12b384de6ed9 100644 --- a/sbin/hastd/hastd.h +++ b/sbin/hastd/hastd.h @@ -39,7 +39,8 @@ #include "hast.h" -extern bool sigexit_received; +extern const char *cfgpath; +extern bool sigexit_received, sighup_received; extern struct pidfh *pfh; void hastd_primary(struct hast_resource *res); diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index faaa136de078..a5e5d6e78bef 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -1,5 +1,6 @@ /*- * Copyright (c) 2009 The FreeBSD Foundation + * Copyright (c) 2010 Pawel Jakub Dawidek * All rights reserved. * * This software was developed by Pawel Jakub Dawidek under sponsorship from @@ -65,6 +66,9 @@ __FBSDID("$FreeBSD$"); #include "subr.h" #include "synch.h" +/* The is only one remote component for now. */ +#define ISREMOTE(no) ((no) == 1) + struct hio { /* * Number of components we are still waiting for. @@ -424,6 +428,7 @@ init_environment(struct hast_resource *res __unused) */ signal(SIGINT, sighandler); signal(SIGTERM, sighandler); + signal(SIGHUP, sighandler); } static void @@ -1713,6 +1718,9 @@ sighandler(int sig) case SIGTERM: sigexit_received = true; break; + case SIGHUP: + sighup_received = true; + break; default: assert(!"invalid condition"); } @@ -1726,6 +1734,114 @@ sighandler(int sig) mtx_unlock(&hio_guard_lock); } +static void +config_reload(void) +{ + struct hastd_config *newcfg; + struct hast_resource *res; + unsigned int ii, ncomps; + int modified; + + pjdlog_info("Reloading configuration..."); + + ncomps = HAST_NCOMPONENTS; + + newcfg = yy_config_parse(cfgpath, false); + if (newcfg == NULL) + goto failed; + + TAILQ_FOREACH(res, &newcfg->hc_resources, hr_next) { + if (strcmp(res->hr_name, gres->hr_name) == 0) + break; + } + /* + * If resource was removed from the configuration file, resource + * name, provider name or path to local component was modified we + * shouldn't be here. This means that someone modified configuration + * file and send SIGHUP to us instead of main hastd process. + * Log advice and ignore the signal. + */ + if (res == NULL || strcmp(gres->hr_name, res->hr_name) != 0 || + strcmp(gres->hr_provname, res->hr_provname) != 0 || + strcmp(gres->hr_localpath, res->hr_localpath) != 0) { + pjdlog_warning("To reload configuration send SIGHUP to the main hastd process (pid %u).", + (unsigned int)getppid()); + goto failed; + } + +#define MODIFIED_REMOTEADDR 0x1 +#define MODIFIED_REPLICATION 0x2 +#define MODIFIED_TIMEOUT 0x4 + modified = 0; + if (strcmp(gres->hr_remoteaddr, res->hr_remoteaddr) != 0) { + /* + * Don't copy res->hr_remoteaddr to gres just yet. + * We want remote_close() to log disconnect from the old + * addresses, not from the new ones. + */ + modified |= MODIFIED_REMOTEADDR; + } + if (gres->hr_replication != res->hr_replication) { + gres->hr_replication = res->hr_replication; + modified |= MODIFIED_REPLICATION; + } + if (gres->hr_timeout != res->hr_timeout) { + gres->hr_timeout = res->hr_timeout; + modified |= MODIFIED_TIMEOUT; + } + /* + * If only timeout was modified we only need to change it without + * reconnecting. + */ + if (modified == MODIFIED_TIMEOUT) { + for (ii = 0; ii < ncomps; ii++) { + if (!ISREMOTE(ii)) + continue; + rw_rlock(&hio_remote_lock[ii]); + if (!ISCONNECTED(gres, ii)) { + rw_unlock(&hio_remote_lock[ii]); + continue; + } + rw_unlock(&hio_remote_lock[ii]); + if (proto_timeout(gres->hr_remotein, + gres->hr_timeout) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to set connection timeout"); + } + if (proto_timeout(gres->hr_remoteout, + gres->hr_timeout) < 0) { + pjdlog_errno(LOG_WARNING, + "Unable to set connection timeout"); + } + } + } else { + for (ii = 0; ii < ncomps; ii++) { + if (!ISREMOTE(ii)) + continue; + remote_close(gres, ii); + } + if (modified & MODIFIED_REMOTEADDR) { + strlcpy(gres->hr_remoteaddr, res->hr_remoteaddr, + sizeof(gres->hr_remoteaddr)); + } + } +#undef MODIFIED_REMOTEADDR +#undef MODIFIED_REPLICATION +#undef MODIFIED_TIMEOUT + + pjdlog_info("Configuration reloaded successfully."); + return; +failed: + if (newcfg != NULL) { + if (newcfg->hc_controlconn != NULL) + proto_close(newcfg->hc_controlconn); + if (newcfg->hc_listenconn != NULL) + proto_close(newcfg->hc_listenconn); + yy_config_free(newcfg); + } + pjdlog_warning("Configuration not reloaded."); +} + /* * Thread guards remote connections and reconnects when needed, handles * signals, etc. @@ -1739,14 +1855,16 @@ guard_thread(void *arg) int timeout; ncomps = HAST_NCOMPONENTS; - /* The is only one remote component for now. */ -#define ISREMOTE(no) ((no) == 1) for (;;) { if (sigexit_received) { primary_exitx(EX_OK, "Termination signal received, exiting."); } + if (sighup_received) { + sighup_received = false; + config_reload(); + } /* * If all the connection will be fine, we will sleep until * someone wakes us up. @@ -1810,7 +1928,6 @@ guard_thread(void *arg) (void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout); mtx_unlock(&hio_guard_lock); } -#undef ISREMOTE /* NOTREACHED */ return (NULL); }