diff --git a/sbin/hastd/hast.conf.5 b/sbin/hastd/hast.conf.5 index f53e6e3a69e5..6410ae735933 100644 --- a/sbin/hastd/hast.conf.5 +++ b/sbin/hastd/hast.conf.5 @@ -63,6 +63,7 @@ checksum compression timeout exec +metaflush "on" | "off" on { # Node section @@ -85,12 +86,14 @@ resource { local timeout exec + metaflush "on" | "off" on { # Resource-node section name # Required local + metaflush "on" | "off" # Required remote source @@ -100,6 +103,7 @@ resource { name # Required local + metaflush "on" | "off" # Required remote source @@ -318,6 +322,25 @@ It can be one of: .Ar secondary , .Ar primary . .Pp +.It Ic metaflush on | off +.Pp +When set to +.Va on , +flush write cache of the local provider after every metadata (activemap) update. +Flushing write cache ensures that provider will not reorder writes and that +metadata will be properly updated before real data is stored. +If the local provider does not support flushing write cache (it returns +.Er EOPNOTSUPP +on the +.Cm BIO_FLUSH +request), +.Nm hastd +will disable +.Ic metaflush +automatically. +The default value is +.Va on . +.Pp .It Ic name Aq name .Pp GEOM provider name that will appear as diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h index a62b63a263f6..7bfef4c248f3 100644 --- a/sbin/hastd/hast.h +++ b/sbin/hastd/hast.h @@ -167,6 +167,8 @@ struct hast_resource { off_t hr_local_mediasize; /* Sector size of local provider. */ unsigned int hr_local_sectorsize; + /* Flush write cache on metadata updates? */ + int hr_metaflush; /* Descriptor for /dev/ggctl communication. */ int hr_ggatefd; diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c index 6518f0cf57fd..d21f7f69f3fe 100644 --- a/sbin/hastd/hastd.c +++ b/sbin/hastd/hastd.c @@ -386,6 +386,12 @@ resource_needs_restart(const struct hast_resource *res0, return (true); if (strcmp(res0->hr_exec, res1->hr_exec) != 0) return (true); + /* + * When metaflush has changed we don't really need restart, + * but it is just easier this way. + */ + if (res0->hr_metaflush != res1->hr_metaflush) + return (true); } return (false); } @@ -416,6 +422,8 @@ resource_needs_reload(const struct hast_resource *res0, return (true); if (strcmp(res0->hr_exec, res1->hr_exec) != 0) return (true); + if (res0->hr_metaflush != res1->hr_metaflush) + return (true); return (false); } @@ -436,6 +444,7 @@ resource_reload(const struct hast_resource *res) nv_add_int32(nvout, (int32_t)res->hr_compression, "compression"); nv_add_int32(nvout, (int32_t)res->hr_timeout, "timeout"); nv_add_string(nvout, res->hr_exec, "exec"); + nv_add_int32(nvout, (int32_t)res->hr_metaflush, "metaflush"); if (nv_error(nvout) != 0) { nv_free(nvout); pjdlog_error("Unable to allocate header for reload message."); @@ -591,12 +600,13 @@ hastd_reload(void) * recreating it. * * We do just reload (send SIGHUP to worker process) if we act as - * PRIMARY, but only if remote address, replication mode, timeout or - * execution path has changed. For those, there is no need to restart - * worker process. + * PRIMARY, but only if remote address, source address, replication + * mode, timeout, execution path or metaflush has changed. + * For those, there is no need to restart worker process. * If PRIMARY receives SIGHUP, it will reconnect if remote address or - * replication mode has changed or simply set new timeout if only - * timeout has changed. + * source address has changed or it will set new timeout if only timeout + * has changed or it will update metaflush if only metaflush has + * changed. */ TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) { TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) { @@ -627,6 +637,7 @@ hastd_reload(void) cres->hr_timeout = nres->hr_timeout; strlcpy(cres->hr_exec, nres->hr_exec, sizeof(cres->hr_exec)); + cres->hr_metaflush = nres->hr_metaflush; if (cres->hr_workerpid != 0) resource_reload(cres); } diff --git a/sbin/hastd/parse.y b/sbin/hastd/parse.y index 01e8593a0d8e..e548a858374b 100644 --- a/sbin/hastd/parse.y +++ b/sbin/hastd/parse.y @@ -68,9 +68,11 @@ static int depth0_checksum; static int depth0_compression; static int depth0_timeout; static char depth0_exec[PATH_MAX]; +static int depth0_metaflush; static char depth1_provname[PATH_MAX]; static char depth1_localpath[PATH_MAX]; +static int depth1_metaflush; extern void yyrestart(FILE *); @@ -197,6 +199,7 @@ yy_config_parse(const char *config, bool exitonerror) strlcpy(depth0_listen_tcp6, HASTD_LISTEN_TCP6, sizeof(depth0_listen_tcp6)); depth0_exec[0] = '\0'; + depth0_metaflush = 1; lconfig = calloc(1, sizeof(*lconfig)); if (lconfig == NULL) { @@ -328,6 +331,13 @@ yy_config_parse(const char *config, bool exitonerror) strlcpy(curres->hr_exec, depth0_exec, sizeof(curres->hr_exec)); } + if (curres->hr_metaflush == -1) { + /* + * Metaflush is not set at resource-level. + * Use global or default setting. + */ + curres->hr_metaflush = depth0_metaflush; + } } return (lconfig); @@ -355,8 +365,8 @@ yy_config_free(struct hastd_config *config) } %} -%token CONTROL LISTEN PORT REPLICATION CHECKSUM COMPRESSION -%token TIMEOUT EXEC EXTENTSIZE RESOURCE NAME LOCAL REMOTE SOURCE ON +%token CONTROL LISTEN PORT REPLICATION CHECKSUM COMPRESSION METAFLUSH +%token TIMEOUT EXEC EXTENTSIZE RESOURCE NAME LOCAL REMOTE SOURCE ON OFF %token FULLSYNC MEMSYNC ASYNC NONE CRC32 SHA256 HOLE LZF %token NUM STR OB CB @@ -364,6 +374,7 @@ yy_config_free(struct hastd_config *config) %type replication_type %type checksum_type %type compression_type +%type boolean %union { @@ -396,6 +407,8 @@ statement: | exec_statement | + metaflush_statement + | node_statement | resource_statement @@ -585,6 +598,34 @@ exec_statement: EXEC STR } ; +metaflush_statement: METAFLUSH boolean + { + switch (depth) { + case 0: + depth0_metaflush = $2; + break; + case 1: + PJDLOG_ASSERT(curres != NULL); + depth1_metaflush = $2; + break; + case 2: + if (!mynode) + break; + PJDLOG_ASSERT(curres != NULL); + curres->hr_metaflush = $2; + break; + default: + PJDLOG_ABORT("metaflush at wrong depth level"); + } + } + ; + +boolean: + ON { $$ = 1; } + | + OFF { $$ = 0; } + ; + node_statement: ON node_start OB node_entries CB { mynode = false; @@ -660,6 +701,13 @@ resource_statement: RESOURCE resource_start OB resource_entries CB strlcpy(curres->hr_localpath, depth1_localpath, sizeof(curres->hr_localpath)); } + if (curres->hr_metaflush == -1 && depth1_metaflush != -1) { + /* + * Metaflush is not set at node-level, + * but is set at resource-level, use it. + */ + curres->hr_metaflush = depth1_metaflush; + } /* * If provider name is not given, use resource name @@ -713,6 +761,7 @@ resource_start: STR */ depth1_provname[0] = '\0'; depth1_localpath[0] = '\0'; + depth1_metaflush = -1; hadmynode = false; curres = calloc(1, sizeof(*curres)); @@ -739,6 +788,7 @@ resource_start: STR curres->hr_provname[0] = '\0'; curres->hr_localpath[0] = '\0'; curres->hr_localfd = -1; + curres->hr_metaflush = -1; curres->hr_remoteaddr[0] = '\0'; curres->hr_sourceaddr[0] = '\0'; curres->hr_ggateunit = -1; @@ -761,6 +811,8 @@ resource_entry: | exec_statement | + metaflush_statement + | name_statement | local_statement @@ -869,6 +921,8 @@ resource_node_entry: remote_statement | source_statement + | + metaflush_statement ; remote_statement: REMOTE remote_str diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c index ebb758ec9e00..08c3329b98e6 100644 --- a/sbin/hastd/primary.c +++ b/sbin/hastd/primary.c @@ -296,6 +296,17 @@ hast_activemap_flush(struct hast_resource *res) pjdlog_errno(LOG_ERR, "Unable to flush activemap to disk"); return (-1); } + if (res->hr_metaflush == 1 && g_flush(res->hr_localfd) == -1) { + if (errno == EOPNOTSUPP) { + pjdlog_warning("The %s provider doesn't support flushing write cache. Disabling it.", + res->hr_localpath); + res->hr_metaflush = 0; + } else { + pjdlog_errno(LOG_ERR, + "Unable to flush disk cache on activemap update"); + return (-1); + } + } return (0); } @@ -1999,6 +2010,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv) nv_assert(nv, "compression"); nv_assert(nv, "timeout"); nv_assert(nv, "exec"); + nv_assert(nv, "metaflush"); ncomps = HAST_NCOMPONENTS; @@ -2009,6 +2021,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv) #define MODIFIED_COMPRESSION 0x10 #define MODIFIED_TIMEOUT 0x20 #define MODIFIED_EXEC 0x40 +#define MODIFIED_METAFLUSH 0x80 modified = 0; vstr = nv_get_string(nv, "remoteaddr"); @@ -2050,6 +2063,11 @@ primary_config_reload(struct hast_resource *res, struct nv *nv) strlcpy(gres->hr_exec, vstr, sizeof(gres->hr_exec)); modified |= MODIFIED_EXEC; } + vint = nv_get_int32(nv, "metaflush"); + if (gres->hr_metaflush != vint) { + gres->hr_metaflush = vint; + modified |= MODIFIED_METAFLUSH; + } /* * Change timeout for connected sockets. @@ -2099,6 +2117,7 @@ primary_config_reload(struct hast_resource *res, struct nv *nv) #undef MODIFIED_COMPRESSION #undef MODIFIED_TIMEOUT #undef MODIFIED_EXEC +#undef MODIFIED_METAFLUSH pjdlog_info("Configuration reloaded successfully."); } diff --git a/sbin/hastd/token.l b/sbin/hastd/token.l index 67c1e130e69d..3a868d7f1e48 100644 --- a/sbin/hastd/token.l +++ b/sbin/hastd/token.l @@ -53,12 +53,14 @@ checksum { DP; return CHECKSUM; } compression { DP; return COMPRESSION; } timeout { DP; return TIMEOUT; } exec { DP; return EXEC; } +metaflush { DP; return METAFLUSH; } resource { DP; return RESOURCE; } name { DP; return NAME; } local { DP; return LOCAL; } remote { DP; return REMOTE; } source { DP; return SOURCE; } on { DP; return ON; } +off { DP; return OFF; } fullsync { DP; return FULLSYNC; } memsync { DP; return MEMSYNC; } async { DP; return ASYNC; }