Enable offlining of log devices.
OpenSolaris revision and Bug IDs: 9701:cc5b64682e64 6803605 should be able to offline log devices 6726045 vdev_deflate_ratio is not set when offlining a log device 6599442 zpool import has faults in the display Approved by: delphij (mentor) Obtained from: OpenSolaris (Bug ID 6803605, 6726045, 6599442) MFC after: 3 weeks
This commit is contained in:
parent
485fc5eb4e
commit
96a1a6a568
@ -980,14 +980,189 @@ max_width(zpool_handle_t *zhp, nvlist_t *nv, int depth, int max)
|
||||
return (max);
|
||||
}
|
||||
|
||||
typedef struct spare_cbdata {
|
||||
uint64_t cb_guid;
|
||||
zpool_handle_t *cb_zhp;
|
||||
} spare_cbdata_t;
|
||||
|
||||
static boolean_t
|
||||
find_vdev(nvlist_t *nv, uint64_t search)
|
||||
{
|
||||
uint64_t guid;
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
|
||||
search == guid)
|
||||
return (B_TRUE);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++)
|
||||
if (find_vdev(child[c], search))
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static int
|
||||
find_spare(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
spare_cbdata_t *cbp = data;
|
||||
nvlist_t *config, *nvroot;
|
||||
|
||||
config = zpool_get_config(zhp, NULL);
|
||||
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvroot) == 0);
|
||||
|
||||
if (find_vdev(nvroot, cbp->cb_guid)) {
|
||||
cbp->cb_zhp = zhp;
|
||||
return (1);
|
||||
}
|
||||
|
||||
zpool_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out configuration state as requested by status_callback.
|
||||
*/
|
||||
void
|
||||
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
||||
int namewidth, int depth, boolean_t isspare)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
vdev_stat_t *vs;
|
||||
char rbuf[6], wbuf[6], cbuf[6], repaired[7];
|
||||
char *vname;
|
||||
uint64_t notpresent;
|
||||
spare_cbdata_t cb;
|
||||
char *state;
|
||||
|
||||
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
|
||||
(uint64_t **)&vs, &c) == 0);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
|
||||
if (isspare) {
|
||||
/*
|
||||
* For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
|
||||
* online drives.
|
||||
*/
|
||||
if (vs->vs_aux == VDEV_AUX_SPARED)
|
||||
state = "INUSE";
|
||||
else if (vs->vs_state == VDEV_STATE_HEALTHY)
|
||||
state = "AVAIL";
|
||||
}
|
||||
|
||||
(void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
|
||||
name, state);
|
||||
|
||||
if (!isspare) {
|
||||
zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
|
||||
zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
|
||||
zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
|
||||
(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
¬present) == 0) {
|
||||
char *path;
|
||||
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
|
||||
(void) printf(" was %s", path);
|
||||
} else if (vs->vs_aux != 0) {
|
||||
(void) printf(" ");
|
||||
|
||||
switch (vs->vs_aux) {
|
||||
case VDEV_AUX_OPEN_FAILED:
|
||||
(void) printf(gettext("cannot open"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_GUID_SUM:
|
||||
(void) printf(gettext("missing device"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_NO_REPLICAS:
|
||||
(void) printf(gettext("insufficient replicas"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_VERSION_NEWER:
|
||||
(void) printf(gettext("newer version"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_SPARED:
|
||||
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
|
||||
&cb.cb_guid) == 0);
|
||||
if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
|
||||
if (strcmp(zpool_get_name(cb.cb_zhp),
|
||||
zpool_get_name(zhp)) == 0)
|
||||
(void) printf(gettext("currently in "
|
||||
"use"));
|
||||
else
|
||||
(void) printf(gettext("in use by "
|
||||
"pool '%s'"),
|
||||
zpool_get_name(cb.cb_zhp));
|
||||
zpool_close(cb.cb_zhp);
|
||||
} else {
|
||||
(void) printf(gettext("currently in use"));
|
||||
}
|
||||
break;
|
||||
|
||||
case VDEV_AUX_ERR_EXCEEDED:
|
||||
(void) printf(gettext("too many errors"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_IO_FAILURE:
|
||||
(void) printf(gettext("experienced I/O failures"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_LOG:
|
||||
(void) printf(gettext("bad intent log"));
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) printf(gettext("corrupted data"));
|
||||
break;
|
||||
}
|
||||
} else if (vs->vs_scrub_repaired != 0 && children == 0) {
|
||||
/*
|
||||
* Report bytes resilvered/repaired on leaf devices.
|
||||
*/
|
||||
zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
|
||||
(void) printf(gettext(" %s %s"), repaired,
|
||||
(vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
|
||||
"resilvered" : "repaired");
|
||||
}
|
||||
|
||||
(void) printf("\n");
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
|
||||
/* Don't print logs here */
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if (is_log)
|
||||
continue;
|
||||
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
print_status_config(zhp, vname, child[c],
|
||||
namewidth, depth + 2, isspare);
|
||||
free(vname);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Print the configuration of an exported pool. Iterate over all vdevs in the
|
||||
* pool, printing out the name and status for each one.
|
||||
*/
|
||||
void
|
||||
print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
|
||||
boolean_t print_logs)
|
||||
print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
@ -1044,12 +1219,11 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
|
||||
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if ((is_log && !print_logs) || (!is_log && print_logs))
|
||||
if (is_log)
|
||||
continue;
|
||||
|
||||
vname = zpool_vdev_name(g_zfs, NULL, child[c]);
|
||||
print_import_config(vname, child[c],
|
||||
namewidth, depth + 2, B_FALSE);
|
||||
print_import_config(vname, child[c], namewidth, depth + 2);
|
||||
free(vname);
|
||||
}
|
||||
|
||||
@ -1074,6 +1248,43 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print log vdevs.
|
||||
* Logs are recorded as top level vdevs in the main pool child array
|
||||
* but with "is_log" set to 1. We use either print_status_config() or
|
||||
* print_import_config() to print the top level logs then any log
|
||||
* children (eg mirrored slogs) are printed recursively - which
|
||||
* works because only the top level vdev is marked "is_log"
|
||||
*/
|
||||
static void
|
||||
print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth, boolean_t verbose)
|
||||
{
|
||||
uint_t c, children;
|
||||
nvlist_t **child;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
|
||||
&children) != 0)
|
||||
return;
|
||||
|
||||
(void) printf(gettext("\tlogs\n"));
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
char *name;
|
||||
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if (!is_log)
|
||||
continue;
|
||||
name = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
if (verbose)
|
||||
print_status_config(zhp, name, child[c], namewidth,
|
||||
2, B_FALSE);
|
||||
else
|
||||
print_import_config(name, child[c], namewidth, 2);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Display the status for the given pool.
|
||||
*/
|
||||
@ -1242,11 +1453,9 @@ show_import(nvlist_t *config)
|
||||
if (namewidth < 10)
|
||||
namewidth = 10;
|
||||
|
||||
print_import_config(name, nvroot, namewidth, 0, B_FALSE);
|
||||
if (num_logs(nvroot) > 0) {
|
||||
(void) printf(gettext("\tlogs\n"));
|
||||
print_import_config(name, nvroot, namewidth, 0, B_TRUE);
|
||||
}
|
||||
print_import_config(name, nvroot, namewidth, 0);
|
||||
if (num_logs(nvroot) > 0)
|
||||
print_logs(NULL, nvroot, namewidth, B_FALSE);
|
||||
|
||||
if (reason == ZPOOL_STATUS_BAD_GUID_SUM) {
|
||||
(void) printf(gettext("\n\tAdditional devices are known to "
|
||||
@ -2717,182 +2926,6 @@ print_scrub_status(nvlist_t *nvroot)
|
||||
(u_longlong_t)(minutes_left / 60), (uint_t)(minutes_left % 60));
|
||||
}
|
||||
|
||||
typedef struct spare_cbdata {
|
||||
uint64_t cb_guid;
|
||||
zpool_handle_t *cb_zhp;
|
||||
} spare_cbdata_t;
|
||||
|
||||
static boolean_t
|
||||
find_vdev(nvlist_t *nv, uint64_t search)
|
||||
{
|
||||
uint64_t guid;
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID, &guid) == 0 &&
|
||||
search == guid)
|
||||
return (B_TRUE);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++)
|
||||
if (find_vdev(child[c], search))
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static int
|
||||
find_spare(zpool_handle_t *zhp, void *data)
|
||||
{
|
||||
spare_cbdata_t *cbp = data;
|
||||
nvlist_t *config, *nvroot;
|
||||
|
||||
config = zpool_get_config(zhp, NULL);
|
||||
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
||||
&nvroot) == 0);
|
||||
|
||||
if (find_vdev(nvroot, cbp->cb_guid)) {
|
||||
cbp->cb_zhp = zhp;
|
||||
return (1);
|
||||
}
|
||||
|
||||
zpool_close(zhp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Print out configuration state as requested by status_callback.
|
||||
*/
|
||||
void
|
||||
print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
||||
int namewidth, int depth, boolean_t isspare)
|
||||
{
|
||||
nvlist_t **child;
|
||||
uint_t c, children;
|
||||
vdev_stat_t *vs;
|
||||
char rbuf[6], wbuf[6], cbuf[6], repaired[7];
|
||||
char *vname;
|
||||
uint64_t notpresent;
|
||||
spare_cbdata_t cb;
|
||||
char *state;
|
||||
|
||||
verify(nvlist_lookup_uint64_array(nv, ZPOOL_CONFIG_STATS,
|
||||
(uint64_t **)&vs, &c) == 0);
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0)
|
||||
children = 0;
|
||||
|
||||
state = zpool_state_to_name(vs->vs_state, vs->vs_aux);
|
||||
if (isspare) {
|
||||
/*
|
||||
* For hot spares, we use the terms 'INUSE' and 'AVAILABLE' for
|
||||
* online drives.
|
||||
*/
|
||||
if (vs->vs_aux == VDEV_AUX_SPARED)
|
||||
state = "INUSE";
|
||||
else if (vs->vs_state == VDEV_STATE_HEALTHY)
|
||||
state = "AVAIL";
|
||||
}
|
||||
|
||||
(void) printf("\t%*s%-*s %-8s", depth, "", namewidth - depth,
|
||||
name, state);
|
||||
|
||||
if (!isspare) {
|
||||
zfs_nicenum(vs->vs_read_errors, rbuf, sizeof (rbuf));
|
||||
zfs_nicenum(vs->vs_write_errors, wbuf, sizeof (wbuf));
|
||||
zfs_nicenum(vs->vs_checksum_errors, cbuf, sizeof (cbuf));
|
||||
(void) printf(" %5s %5s %5s", rbuf, wbuf, cbuf);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_NOT_PRESENT,
|
||||
¬present) == 0) {
|
||||
char *path;
|
||||
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) == 0);
|
||||
(void) printf(" was %s", path);
|
||||
} else if (vs->vs_aux != 0) {
|
||||
(void) printf(" ");
|
||||
|
||||
switch (vs->vs_aux) {
|
||||
case VDEV_AUX_OPEN_FAILED:
|
||||
(void) printf(gettext("cannot open"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_GUID_SUM:
|
||||
(void) printf(gettext("missing device"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_NO_REPLICAS:
|
||||
(void) printf(gettext("insufficient replicas"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_VERSION_NEWER:
|
||||
(void) printf(gettext("newer version"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_SPARED:
|
||||
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
|
||||
&cb.cb_guid) == 0);
|
||||
if (zpool_iter(g_zfs, find_spare, &cb) == 1) {
|
||||
if (strcmp(zpool_get_name(cb.cb_zhp),
|
||||
zpool_get_name(zhp)) == 0)
|
||||
(void) printf(gettext("currently in "
|
||||
"use"));
|
||||
else
|
||||
(void) printf(gettext("in use by "
|
||||
"pool '%s'"),
|
||||
zpool_get_name(cb.cb_zhp));
|
||||
zpool_close(cb.cb_zhp);
|
||||
} else {
|
||||
(void) printf(gettext("currently in use"));
|
||||
}
|
||||
break;
|
||||
|
||||
case VDEV_AUX_ERR_EXCEEDED:
|
||||
(void) printf(gettext("too many errors"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_IO_FAILURE:
|
||||
(void) printf(gettext("experienced I/O failures"));
|
||||
break;
|
||||
|
||||
case VDEV_AUX_BAD_LOG:
|
||||
(void) printf(gettext("bad intent log"));
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) printf(gettext("corrupted data"));
|
||||
break;
|
||||
}
|
||||
} else if (vs->vs_scrub_repaired != 0 && children == 0) {
|
||||
/*
|
||||
* Report bytes resilvered/repaired on leaf devices.
|
||||
*/
|
||||
zfs_nicenum(vs->vs_scrub_repaired, repaired, sizeof (repaired));
|
||||
(void) printf(gettext(" %s %s"), repaired,
|
||||
(vs->vs_scrub_type == POOL_SCRUB_RESILVER) ?
|
||||
"resilvered" : "repaired");
|
||||
}
|
||||
|
||||
(void) printf("\n");
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
|
||||
/* Don't print logs here */
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if (is_log)
|
||||
continue;
|
||||
vname = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
print_status_config(zhp, vname, child[c],
|
||||
namewidth, depth + 2, isspare);
|
||||
free(vname);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
print_error_log(zpool_handle_t *zhp)
|
||||
{
|
||||
@ -2968,39 +3001,6 @@ print_l2cache(zpool_handle_t *zhp, nvlist_t **l2cache, uint_t nl2cache,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Print log vdevs.
|
||||
* Logs are recorded as top level vdevs in the main pool child array but with
|
||||
* "is_log" set to 1. We use print_status_config() to print the top level logs
|
||||
* then any log children (eg mirrored slogs) are printed recursively - which
|
||||
* works because only the top level vdev is marked "is_log"
|
||||
*/
|
||||
static void
|
||||
print_logs(zpool_handle_t *zhp, nvlist_t *nv, int namewidth)
|
||||
{
|
||||
uint_t c, children;
|
||||
nvlist_t **child;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN, &child,
|
||||
&children) != 0)
|
||||
return;
|
||||
|
||||
(void) printf(gettext("\tlogs\n"));
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
uint64_t is_log = B_FALSE;
|
||||
char *name;
|
||||
|
||||
(void) nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log);
|
||||
if (!is_log)
|
||||
continue;
|
||||
name = zpool_vdev_name(g_zfs, zhp, child[c]);
|
||||
print_status_config(zhp, name, child[c], namewidth, 2, B_FALSE);
|
||||
free(name);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Display a summary of pool status. Displays a summary such as:
|
||||
*
|
||||
@ -3229,7 +3229,7 @@ status_callback(zpool_handle_t *zhp, void *data)
|
||||
namewidth, 0, B_FALSE);
|
||||
|
||||
if (num_logs(nvroot) > 0)
|
||||
print_logs(zhp, nvroot, namewidth);
|
||||
print_logs(zhp, nvroot, namewidth, B_TRUE);
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
|
||||
&l2cache, &nl2cache) == 0)
|
||||
print_l2cache(zhp, l2cache, nl2cache, namewidth);
|
||||
|
@ -116,6 +116,7 @@ enum {
|
||||
EZFS_VDEVNOTSUP, /* unsupported vdev type */
|
||||
EZFS_NOTSUP, /* ops not supported on this dataset */
|
||||
EZFS_ACTIVE_SPARE, /* pool has active shared spare devices */
|
||||
EZFS_UNPLAYED_LOGS, /* log device has unplayed logs */
|
||||
EZFS_UNKNOWN
|
||||
};
|
||||
|
||||
|
@ -1720,6 +1720,12 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid)
|
||||
*/
|
||||
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
|
||||
|
||||
case EEXIST:
|
||||
/*
|
||||
* The log device has unplayed logs
|
||||
*/
|
||||
return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
|
||||
|
||||
default:
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
}
|
||||
|
@ -210,6 +210,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
case EZFS_ACTIVE_SPARE:
|
||||
return (dgettext(TEXT_DOMAIN, "pool has active shared spare "
|
||||
"device"));
|
||||
case EZFS_UNPLAYED_LOGS:
|
||||
return (dgettext(TEXT_DOMAIN, "log device has unplayed intent "
|
||||
"logs"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
|
@ -351,7 +351,7 @@ traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg)
|
||||
return;
|
||||
|
||||
/*
|
||||
* One block ("stumpy") can be allocated a long time ago; we
|
||||
* One block ("stubby") can be allocated a long time ago; we
|
||||
* want to visit that one because it has been allocated
|
||||
* (on-disk) even if it hasn't been claimed (even though for
|
||||
* plain scrub there's nothing to do to it).
|
||||
|
@ -1109,6 +1109,33 @@ spa_check_removed(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Load the slog device state from the config object since it's possible
|
||||
* that the label does not contain the most up-to-date information.
|
||||
*/
|
||||
void
|
||||
spa_load_log_state(spa_t *spa)
|
||||
{
|
||||
nvlist_t *nv, *nvroot, **child;
|
||||
uint64_t is_log;
|
||||
uint_t children, c;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
|
||||
VERIFY(load_nvlist(spa, spa->spa_config_object, &nv) == 0);
|
||||
VERIFY(nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_VDEV_TREE, &nvroot) == 0);
|
||||
VERIFY(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0);
|
||||
|
||||
for (c = 0; c < children; c++) {
|
||||
vdev_t *tvd = rvd->vdev_child[c];
|
||||
|
||||
if (nvlist_lookup_uint64(child[c], ZPOOL_CONFIG_IS_LOG,
|
||||
&is_log) == 0 && is_log)
|
||||
vdev_load_log_state(tvd, child[c]);
|
||||
}
|
||||
nvlist_free(nv);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for missing log devices
|
||||
*/
|
||||
@ -1125,13 +1152,7 @@ spa_check_logs(spa_t *spa)
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
|
||||
case SPA_LOG_CLEAR:
|
||||
(void) dmu_objset_find(spa->spa_name, zil_clear_log_chain, NULL,
|
||||
DS_FIND_CHILDREN);
|
||||
break;
|
||||
}
|
||||
spa->spa_log_state = SPA_LOG_GOOD;
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -1455,6 +1476,8 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
}
|
||||
|
||||
spa_load_log_state(spa);
|
||||
|
||||
if (spa_check_logs(spa)) {
|
||||
vdev_set_state(rvd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LOG);
|
||||
@ -1542,6 +1565,7 @@ spa_load(spa_t *spa, nvlist_t *config, spa_load_state_t state, int mosconfig)
|
||||
zil_claim, tx, DS_FIND_CHILDREN);
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
spa->spa_log_state = SPA_LOG_GOOD;
|
||||
spa->spa_sync_on = B_TRUE;
|
||||
txg_sync_start(spa->spa_dsl_pool);
|
||||
|
||||
|
@ -259,6 +259,7 @@ extern void vdev_remove_parent(vdev_t *cvd);
|
||||
/*
|
||||
* vdev sync load and sync
|
||||
*/
|
||||
extern void vdev_load_log_state(vdev_t *vd, nvlist_t *nv);
|
||||
extern void vdev_load(vdev_t *vd);
|
||||
extern void vdev_sync(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_sync_done(vdev_t *vd, uint64_t txg);
|
||||
|
@ -366,9 +366,9 @@ extern uint64_t zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx);
|
||||
|
||||
extern void zil_commit(zilog_t *zilog, uint64_t seq, uint64_t oid);
|
||||
|
||||
extern int zil_vdev_offline(char *osname, void *txarg);
|
||||
extern int zil_claim(char *osname, void *txarg);
|
||||
extern int zil_check_log_chain(char *osname, void *txarg);
|
||||
extern int zil_clear_log_chain(char *osname, void *txarg);
|
||||
extern void zil_sync(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_clean(zilog_t *zilog);
|
||||
extern int zil_is_committed(zilog_t *zilog);
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <sys/zap.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zil.h>
|
||||
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, vdev, CTLFLAG_RW, 0, "ZFS VDEV");
|
||||
@ -765,6 +766,15 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
|
||||
if (vd->vdev_ms_shift == 0) /* not being allocated from yet */
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* Compute the raidz-deflation ratio. Note, we hard-code
|
||||
* in 128k (1 << 17) because it is the current "typical" blocksize.
|
||||
* Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
|
||||
* or we will inconsistently account for existing bp's.
|
||||
*/
|
||||
vd->vdev_deflate_ratio = (1 << 17) /
|
||||
(vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
|
||||
|
||||
ASSERT(oldc <= newc);
|
||||
|
||||
if (vd->vdev_islog)
|
||||
@ -998,6 +1008,8 @@ vdev_open(vdev_t *vd)
|
||||
vd->vdev_state == VDEV_STATE_OFFLINE);
|
||||
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_NONE;
|
||||
vd->vdev_cant_read = B_FALSE;
|
||||
vd->vdev_cant_write = B_FALSE;
|
||||
|
||||
if (!vd->vdev_removed && vd->vdev_faulted) {
|
||||
ASSERT(vd->vdev_children == 0);
|
||||
@ -1112,18 +1124,6 @@ vdev_open(vdev_t *vd)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* If this is a top-level vdev, compute the raidz-deflation
|
||||
* ratio. Note, we hard-code in 128k (1<<17) because it is the
|
||||
* current "typical" blocksize. Even if SPA_MAXBLOCKSIZE
|
||||
* changes, this algorithm must never change, or we will
|
||||
* inconsistently account for existing bp's.
|
||||
*/
|
||||
if (vd->vdev_top == vd) {
|
||||
vd->vdev_deflate_ratio = (1<<17) /
|
||||
(vdev_psize_to_asize(vd, 1<<17) >> SPA_MINBLOCKSHIFT);
|
||||
}
|
||||
|
||||
/*
|
||||
* If a leaf vdev has a DTL, and seems healthy, then kick off a
|
||||
* resilver. But don't do this if we are doing a reopen for a scrub,
|
||||
@ -1937,7 +1937,8 @@ vdev_online(spa_t *spa, uint64_t guid, uint64_t flags, vdev_state_t *newstate)
|
||||
int
|
||||
vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
|
||||
{
|
||||
vdev_t *vd;
|
||||
vdev_t *vd, *tvd;
|
||||
int error;
|
||||
|
||||
spa_vdev_state_enter(spa);
|
||||
|
||||
@ -1947,34 +1948,58 @@ vdev_offline(spa_t *spa, uint64_t guid, uint64_t flags)
|
||||
if (!vd->vdev_ops->vdev_op_leaf)
|
||||
return (spa_vdev_state_exit(spa, NULL, ENOTSUP));
|
||||
|
||||
tvd = vd->vdev_top;
|
||||
|
||||
/*
|
||||
* If the device isn't already offline, try to offline it.
|
||||
*/
|
||||
if (!vd->vdev_offline) {
|
||||
/*
|
||||
* If this device has the only valid copy of some data,
|
||||
* don't allow it to be offlined.
|
||||
* don't allow it to be offlined. Log devices are always
|
||||
* expendable.
|
||||
*/
|
||||
if (vd->vdev_aux == NULL && vdev_dtl_required(vd))
|
||||
if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
|
||||
vdev_dtl_required(vd))
|
||||
return (spa_vdev_state_exit(spa, NULL, EBUSY));
|
||||
|
||||
/*
|
||||
* Offline this device and reopen its top-level vdev.
|
||||
* If this action results in the top-level vdev becoming
|
||||
* unusable, undo it and fail the request.
|
||||
* If the top-level vdev is a log device then just offline
|
||||
* it. Otherwise, if this action results in the top-level
|
||||
* vdev becoming unusable, undo it and fail the request.
|
||||
*/
|
||||
vd->vdev_offline = B_TRUE;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
if (vd->vdev_aux == NULL && vdev_is_dead(vd->vdev_top)) {
|
||||
vdev_reopen(tvd);
|
||||
|
||||
if (!tvd->vdev_islog && vd->vdev_aux == NULL &&
|
||||
vdev_is_dead(tvd)) {
|
||||
vd->vdev_offline = B_FALSE;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
vdev_reopen(tvd);
|
||||
return (spa_vdev_state_exit(spa, NULL, EBUSY));
|
||||
}
|
||||
}
|
||||
|
||||
vd->vdev_tmpoffline = !!(flags & ZFS_OFFLINE_TEMPORARY);
|
||||
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
if (!tvd->vdev_islog || !vdev_is_dead(tvd))
|
||||
return (spa_vdev_state_exit(spa, vd, 0));
|
||||
|
||||
(void) spa_vdev_state_exit(spa, vd, 0);
|
||||
|
||||
error = dmu_objset_find(spa_name(spa), zil_vdev_offline,
|
||||
NULL, DS_FIND_CHILDREN);
|
||||
if (error) {
|
||||
(void) vdev_online(spa, guid, 0, NULL);
|
||||
return (error);
|
||||
}
|
||||
/*
|
||||
* If we successfully offlined the log device then we need to
|
||||
* sync out the current txg so that the "stubby" block can be
|
||||
* removed by zil_sync().
|
||||
*/
|
||||
txg_wait_synced(spa->spa_dsl_pool, 0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2279,6 +2304,7 @@ vdev_space_update(vdev_t *vd, int64_t space_delta, int64_t alloc_delta,
|
||||
* childrens', thus not accurate enough for us.
|
||||
*/
|
||||
ASSERT((dspace_delta & (SPA_MINBLOCKSIZE-1)) == 0);
|
||||
ASSERT(vd->vdev_deflate_ratio != 0 || vd->vdev_isl2cache);
|
||||
dspace_delta = (dspace_delta >> SPA_MINBLOCKSHIFT) *
|
||||
vd->vdev_deflate_ratio;
|
||||
|
||||
@ -2631,11 +2657,7 @@ vdev_set_state(vdev_t *vd, boolean_t isopen, vdev_state_t state, vdev_aux_t aux)
|
||||
boolean_t
|
||||
vdev_is_bootable(vdev_t *vd)
|
||||
{
|
||||
#ifdef __FreeBSD_version
|
||||
return (B_TRUE);
|
||||
#else
|
||||
int c;
|
||||
|
||||
#ifdef sun
|
||||
if (!vd->vdev_ops->vdev_op_leaf) {
|
||||
char *vdev_type = vd->vdev_ops->vdev_op_type;
|
||||
|
||||
@ -2654,6 +2676,35 @@ vdev_is_bootable(vdev_t *vd)
|
||||
if (!vdev_is_bootable(vd->vdev_child[c]))
|
||||
return (B_FALSE);
|
||||
}
|
||||
#endif /* sun */
|
||||
return (B_TRUE);
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
vdev_load_log_state(vdev_t *vd, nvlist_t *nv)
|
||||
{
|
||||
uint_t c, children;
|
||||
nvlist_t **child;
|
||||
uint64_t val;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) == 0) {
|
||||
for (c = 0; c < children; c++)
|
||||
vdev_load_log_state(vd->vdev_child[c], child[c]);
|
||||
}
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf && nvlist_lookup_uint64(nv,
|
||||
ZPOOL_CONFIG_OFFLINE, &val) == 0 && val) {
|
||||
|
||||
/*
|
||||
* It would be nice to call vdev_offline()
|
||||
* directly but the pool isn't fully loaded and
|
||||
* the txg threads have not been started yet.
|
||||
*/
|
||||
spa_config_enter(spa, SCL_STATE_ALL, FTAG, RW_WRITER);
|
||||
vd->vdev_offline = val;
|
||||
vdev_reopen(vd->vdev_top);
|
||||
spa_config_exit(spa, SCL_STATE_ALL, FTAG);
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,7 @@
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/arc.h>
|
||||
@ -515,6 +516,13 @@ zil_claim(char *osname, void *txarg)
|
||||
zilog = dmu_objset_zil(os);
|
||||
zh = zil_header_in_syncing_context(zilog);
|
||||
|
||||
if (zilog->zl_spa->spa_log_state == SPA_LOG_CLEAR) {
|
||||
if (!BP_IS_HOLE(&zh->zh_log))
|
||||
zio_free_blk(zilog->zl_spa, &zh->zh_log, first_txg);
|
||||
BP_ZERO(&zh->zh_log);
|
||||
dsl_dataset_dirty(dmu_objset_ds(os), tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record here whether the zil has any records to replay.
|
||||
* If the header block pointer is null or the block points
|
||||
@ -527,8 +535,10 @@ zil_claim(char *osname, void *txarg)
|
||||
* Note, the intent log can be empty but still need the
|
||||
* stubby to be claimed.
|
||||
*/
|
||||
if (!zil_empty(zilog))
|
||||
if (!zil_empty(zilog)) {
|
||||
zh->zh_flags |= ZIL_REPLAY_NEEDED;
|
||||
dsl_dataset_dirty(dmu_objset_ds(os), tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Claim all log blocks if we haven't already done so, and remember
|
||||
@ -597,36 +607,6 @@ zil_check_log_chain(char *osname, void *txarg)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear a log chain
|
||||
*/
|
||||
/* ARGSUSED */
|
||||
int
|
||||
zil_clear_log_chain(char *osname, void *txarg)
|
||||
{
|
||||
zilog_t *zilog;
|
||||
zil_header_t *zh;
|
||||
objset_t *os;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
|
||||
if (error) {
|
||||
cmn_err(CE_WARN, "can't open objset for %s", osname);
|
||||
return (0);
|
||||
}
|
||||
|
||||
zilog = dmu_objset_zil(os);
|
||||
tx = dmu_tx_create(zilog->zl_os);
|
||||
(void) dmu_tx_assign(tx, TXG_WAIT);
|
||||
zh = zil_header_in_syncing_context(zilog);
|
||||
BP_ZERO(&zh->zh_log);
|
||||
dsl_dataset_dirty(dmu_objset_ds(os), tx);
|
||||
dmu_tx_commit(tx);
|
||||
dmu_objset_close(os);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zil_vdev_compare(const void *x1, const void *x2)
|
||||
{
|
||||
@ -771,9 +751,9 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
|
||||
}
|
||||
if (lwb->lwb_zio == NULL) {
|
||||
lwb->lwb_zio = zio_rewrite(zilog->zl_root_zio, zilog->zl_spa,
|
||||
0, &lwb->lwb_blk, lwb->lwb_buf,
|
||||
lwb->lwb_sz, zil_lwb_write_done, lwb,
|
||||
ZIO_PRIORITY_LOG_WRITE, ZIO_FLAG_CANFAIL, &zb);
|
||||
0, &lwb->lwb_blk, lwb->lwb_buf, lwb->lwb_sz,
|
||||
zil_lwb_write_done, lwb, ZIO_PRIORITY_LOG_WRITE,
|
||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE, &zb);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1270,12 +1250,7 @@ zil_sync(zilog_t *zilog, dmu_tx_t *tx)
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
lwb = list_head(&zilog->zl_lwb_list);
|
||||
if (lwb == NULL) {
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
return;
|
||||
}
|
||||
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
|
||||
zh->zh_log = lwb->lwb_blk;
|
||||
if (lwb->lwb_buf != NULL || lwb->lwb_max_txg > txg)
|
||||
break;
|
||||
@ -1692,3 +1667,24 @@ out:
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
int
|
||||
zil_vdev_offline(char *osname, void *arg)
|
||||
{
|
||||
objset_t *os;
|
||||
zilog_t *zilog;
|
||||
int error;
|
||||
|
||||
error = dmu_objset_open(osname, DMU_OST_ANY, DS_MODE_USER, &os);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
zilog = dmu_objset_zil(os);
|
||||
if (zil_suspend(zilog) != 0)
|
||||
error = EEXIST;
|
||||
else
|
||||
zil_resume(zilog);
|
||||
dmu_objset_close(os);
|
||||
return (error);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user