freebsd-dev/cddl/usr.sbin/zfsd/case_file.cc
Alan Somers f698c1e99b zfsd: add support for hotplugging spares
If you remove an unused spare and then reinsert it, zfsd will now online
it in all pools.

Do not MFC without 2a58b312b6 (but it's ok to MFC that one without this
one).

Submitted by:	Ameer Hamza <ahamza@ixsystems.com> (zfsd), Me (tests)
MFC after:	2 weeks
MFC with:	2a58b312b6
Sponsored by:	iX Systems, Axcient
Pull Request:	https://github.com/freebsd/freebsd-src/pull/697
2023-04-06 11:58:55 -06:00

1196 lines
31 KiB
C++

/*-
* Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* substantially similar to the "NO WARRANTY" disclaimer below
* ("Disclaimer") and any redistribution must be conditioned upon
* including a substantially similar Disclaimer requirement for further
* binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGES.
*
* Authors: Justin T. Gibbs (Spectra Logic Corporation)
*/
/**
* \file case_file.cc
*
* We keep case files for any leaf vdev that is not in the optimal state.
* However, we only serialize to disk those events that need to be preserved
* across reboots. For now, this is just a log of soft errors which we
* accumulate in order to mark a device as degraded.
*/
#include <sys/cdefs.h>
#include <sys/byteorder.h>
#include <sys/time.h>
#include <sys/fs/zfs.h>
#include <dirent.h>
#include <fcntl.h>
#include <iomanip>
#include <fstream>
#include <functional>
#include <sstream>
#include <syslog.h>
#include <unistd.h>
#include <libzfs.h>
#include <list>
#include <map>
#include <string>
#include <devdctl/guid.h>
#include <devdctl/event.h>
#include <devdctl/event_factory.h>
#include <devdctl/exception.h>
#include <devdctl/consumer.h>
#include "callout.h"
#include "vdev_iterator.h"
#include "zfsd_event.h"
#include "case_file.h"
#include "vdev.h"
#include "zfsd.h"
#include "zfsd_exception.h"
#include "zpool_list.h"
__FBSDID("$FreeBSD$");
/*============================ Namespace Control =============================*/
using std::hex;
using std::ifstream;
using std::stringstream;
using std::setfill;
using std::setw;
using DevdCtl::Event;
using DevdCtl::EventFactory;
using DevdCtl::EventList;
using DevdCtl::Guid;
using DevdCtl::ParseException;
/*--------------------------------- CaseFile ---------------------------------*/
//- CaseFile Static Data -------------------------------------------------------
CaseFileList CaseFile::s_activeCases;
const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
//- CaseFile Static Public Methods ---------------------------------------------
CaseFile *
CaseFile::Find(Guid poolGUID, Guid vdevGUID)
{
for (CaseFileList::iterator curCase = s_activeCases.begin();
curCase != s_activeCases.end(); curCase++) {
if (((*curCase)->PoolGUID() != poolGUID
&& Guid::InvalidGuid() != poolGUID)
|| (*curCase)->VdevGUID() != vdevGUID)
continue;
/*
* We only carry one active case per-vdev.
*/
return (*curCase);
}
return (NULL);
}
void
CaseFile::Find(Guid poolGUID, Guid vdevGUID, CaseFileList &cases)
{
for (CaseFileList::iterator curCase = s_activeCases.begin();
curCase != s_activeCases.end(); curCase++) {
if (((*curCase)->PoolGUID() != poolGUID &&
Guid::InvalidGuid() != poolGUID) ||
(*curCase)->VdevGUID() != vdevGUID)
continue;
/*
* We can have multiple cases for spare vdevs
*/
cases.push_back(*curCase);
if (!(*curCase)->IsSpare()) {
return;
}
}
}
CaseFile *
CaseFile::Find(const string &physPath)
{
CaseFile *result = NULL;
for (CaseFileList::iterator curCase = s_activeCases.begin();
curCase != s_activeCases.end(); curCase++) {
if ((*curCase)->PhysicalPath() != physPath)
continue;
if (result != NULL) {
syslog(LOG_WARNING, "Multiple casefiles found for "
"physical path %s. "
"This is most likely a bug in zfsd",
physPath.c_str());
}
result = *curCase;
}
return (result);
}
void
CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
{
CaseFileList::iterator casefile;
for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
CaseFileList::iterator next = casefile;
next++;
if (poolGUID == (*casefile)->PoolGUID())
(*casefile)->ReEvaluate(event);
casefile = next;
}
}
CaseFile &
CaseFile::Create(Vdev &vdev)
{
CaseFile *activeCase;
activeCase = Find(vdev.PoolGUID(), vdev.GUID());
if (activeCase == NULL)
activeCase = new CaseFile(vdev);
return (*activeCase);
}
void
CaseFile::DeSerialize()
{
struct dirent **caseFiles;
int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
DeSerializeSelector, /*compar*/NULL));
if (numCaseFiles == -1)
return;
if (numCaseFiles == 0) {
free(caseFiles);
return;
}
for (int i = 0; i < numCaseFiles; i++) {
DeSerializeFile(caseFiles[i]->d_name);
free(caseFiles[i]);
}
free(caseFiles);
}
bool
CaseFile::Empty()
{
return (s_activeCases.empty());
}
void
CaseFile::LogAll()
{
for (CaseFileList::iterator curCase = s_activeCases.begin();
curCase != s_activeCases.end(); curCase++)
(*curCase)->Log();
}
void
CaseFile::PurgeAll()
{
/*
* Serialize casefiles before deleting them so that they can be reread
* and revalidated during BuildCaseFiles.
* CaseFiles remove themselves from this list on destruction.
*/
while (s_activeCases.size() != 0) {
CaseFile *casefile = s_activeCases.front();
casefile->Serialize();
delete casefile;
}
}
int
CaseFile::IsSpare()
{
return (m_is_spare);
}
//- CaseFile Public Methods ----------------------------------------------------
bool
CaseFile::RefreshVdevState()
{
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
if (casePool == NULL)
return (false);
Vdev vd(casePool, CaseVdev(casePool));
if (vd.DoesNotExist())
return (false);
m_vdevState = vd.State();
m_vdevPhysPath = vd.PhysicalPath();
return (true);
}
bool
CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
{
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
int flags = ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE;
if (pool == NULL || !RefreshVdevState()) {
/*
* The pool or vdev for this case file is no longer
* part of the configuration. This can happen
* if we process a device arrival notification
* before seeing the ZFS configuration change
* event.
*/
syslog(LOG_INFO,
"CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. "
"Closing\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str());
Close();
/*
* Since this event was not used to close this
* case, do not report it as consumed.
*/
return (/*consumed*/false);
}
if (VdevState() > VDEV_STATE_CANT_OPEN) {
/*
* For now, newly discovered devices only help for
* devices that are missing. In the future, we might
* use a newly inserted spare to replace a degraded
* or faulted device.
*/
syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
PoolGUIDString().c_str(), VdevGUIDString().c_str());
return (/*consumed*/false);
}
if (vdev != NULL
&& ( vdev->PoolGUID() == m_poolGUID
|| vdev->PoolGUID() == Guid::InvalidGuid())
&& vdev->GUID() == m_vdevGUID) {
if (IsSpare())
flags |= ZFS_ONLINE_SPARE;
if (zpool_vdev_online(pool, vdev->GUIDString().c_str(),
flags, &m_vdevState) != 0) {
syslog(LOG_ERR,
"Failed to online vdev(%s/%s:%s): %s: %s\n",
zpool_get_name(pool), vdev->GUIDString().c_str(),
devPath.c_str(), libzfs_error_action(g_zfsHandle),
libzfs_error_description(g_zfsHandle));
return (/*consumed*/false);
}
syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n",
zpool_get_name(pool), vdev->GUIDString().c_str(),
devPath.c_str(),
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
/*
* Check the vdev state post the online action to see
* if we can retire this case.
*/
CloseIfSolved();
return (/*consumed*/true);
}
/*
* If the auto-replace policy is enabled, and we have physical
* path information, try a physical path replacement.
*/
if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
syslog(LOG_INFO,
"CaseFile(%s:%s:%s): AutoReplace not set. "
"Ignoring device insertion.\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str(),
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
return (/*consumed*/false);
}
if (PhysicalPath().empty()) {
syslog(LOG_INFO,
"CaseFile(%s:%s:%s): No physical path information. "
"Ignoring device insertion.\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str(),
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
return (/*consumed*/false);
}
if (physPath != PhysicalPath()) {
syslog(LOG_INFO,
"CaseFile(%s:%s:%s): Physical path mismatch. "
"Ignoring device insertion.\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str(),
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
return (/*consumed*/false);
}
/* Write a label on the newly inserted disk. */
if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
syslog(LOG_ERR,
"Replace vdev(%s/%s) by physical path (label): %s: %s\n",
zpool_get_name(pool), VdevGUIDString().c_str(),
libzfs_error_action(g_zfsHandle),
libzfs_error_description(g_zfsHandle));
return (/*consumed*/false);
}
syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
PoolGUIDString().c_str(), VdevGUIDString().c_str(),
devPath.c_str());
return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
}
bool
CaseFile::ReEvaluate(const ZfsEvent &event)
{
bool consumed(false);
if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
/*
* The Vdev we represent has been removed from the
* configuration. This case is no longer of value.
*/
Close();
return (/*consumed*/true);
} else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
/* This Pool has been destroyed. Discard the case */
Close();
return (/*consumed*/true);
} else if (event.Value("type") == "misc.fs.zfs.config_sync") {
RefreshVdevState();
if (VdevState() < VDEV_STATE_HEALTHY)
consumed = ActivateSpare();
}
if (event.Value("class") == "resource.fs.zfs.removed") {
bool spare_activated;
if (!RefreshVdevState()) {
/*
* The pool or vdev for this case file is no longer
* part of the configuration. This can happen
* if we process a device arrival notification
* before seeing the ZFS configuration change
* event.
*/
syslog(LOG_INFO,
"CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
"unconfigured. Closing\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str());
/*
* Close the case now so we won't waste cycles in the
* system rescan
*/
Close();
/*
* Since this event was not used to close this
* case, do not report it as consumed.
*/
return (/*consumed*/false);
}
/*
* Discard any tentative I/O error events for
* this case. They were most likely caused by the
* hot-unplug of this device.
*/
PurgeTentativeEvents();
/* Try to activate spares if they are available */
spare_activated = ActivateSpare();
/*
* Rescan the drives in the system to see if a recent
* drive arrival can be used to solve this case.
*/
ZfsDaemon::RequestSystemRescan();
/*
* Consume the event if we successfully activated a spare.
* Otherwise, leave it in the unconsumed events list so that the
* future addition of a spare to this pool might be able to
* close the case
*/
consumed = spare_activated;
} else if (event.Value("class") == "resource.fs.zfs.statechange") {
RefreshVdevState();
/*
* If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
* activate a hotspare. Otherwise, ignore the event
*/
if (VdevState() == VDEV_STATE_FAULTED ||
VdevState() == VDEV_STATE_DEGRADED ||
VdevState() == VDEV_STATE_CANT_OPEN)
(void) ActivateSpare();
consumed = true;
}
else if (event.Value("class") == "ereport.fs.zfs.io" ||
event.Value("class") == "ereport.fs.zfs.checksum") {
m_tentativeEvents.push_front(event.DeepCopy());
RegisterCallout(event);
consumed = true;
}
bool closed(CloseIfSolved());
return (consumed || closed);
}
/* Find a Vdev containing the vdev with the given GUID */
static nvlist_t*
find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
{
nvlist_t **vdevChildren;
int error;
unsigned ch, numChildren;
error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
&vdevChildren, &numChildren);
if (error != 0 || numChildren == 0)
return (NULL);
for (ch = 0; ch < numChildren; ch++) {
nvlist *result;
Vdev vdev(pool_config, vdevChildren[ch]);
if (vdev.GUID() == child_guid)
return (config);
result = find_parent(pool_config, vdevChildren[ch], child_guid);
if (result != NULL)
return (result);
}
return (NULL);
}
bool
CaseFile::ActivateSpare() {
nvlist_t *config, *nvroot, *parent_config;
nvlist_t **spares;
const char *devPath, *poolname, *vdev_type;
u_int nspares, i;
int error;
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
if (zhp == NULL) {
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
"for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
return (false);
}
poolname = zpool_get_name(zhp);
config = zpool_get_config(zhp, NULL);
if (config == NULL) {
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
"config for pool %s", poolname);
return (false);
}
error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
if (error != 0){
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
"tree for pool %s", poolname);
return (false);
}
parent_config = find_parent(config, nvroot, m_vdevGUID);
if (parent_config != NULL) {
const char *parent_type;
/*
* Don't activate spares for members of a "replacing" vdev.
* They're already dealt with. Sparing them will just drag out
* the resilver process.
*/
error = nvlist_lookup_string(parent_config,
ZPOOL_CONFIG_TYPE, &parent_type);
if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
return (false);
}
nspares = 0;
nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
&nspares);
if (nspares == 0) {
/* The pool has no spares configured */
syslog(LOG_INFO, "CaseFile::ActivateSpare: "
"No spares available for pool %s", poolname);
return (false);
}
for (i = 0; i < nspares; i++) {
uint64_t *nvlist_array;
vdev_stat_t *vs;
uint_t nstats;
if (nvlist_lookup_uint64_array(spares[i],
ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
"find vdev stats for pool %s, spare %d",
poolname, i);
return (false);
}
vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
if ((vs->vs_aux != VDEV_AUX_SPARED)
&& (vs->vs_state == VDEV_STATE_HEALTHY)) {
/* We found a usable spare */
break;
}
}
if (i == nspares) {
/* No available spares were found */
return (false);
}
error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
if (error != 0) {
syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
"the path of pool %s, spare %d. Error %d",
poolname, i, error);
return (false);
}
error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
if (error != 0) {
syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
"the vdev type of pool %s, spare %d. Error %d",
poolname, i, error);
return (false);
}
return (Replace(vdev_type, devPath, /*isspare*/true));
}
void
CaseFile::RegisterCallout(const Event &event)
{
timeval now, countdown, elapsed, timestamp, zero, remaining;
gettimeofday(&now, 0);
timestamp = event.GetTimestamp();
timersub(&now, &timestamp, &elapsed);
timersub(&s_removeGracePeriod, &elapsed, &countdown);
/*
* If countdown is <= zero, Reset the timer to the
* smallest positive time value instead
*/
timerclear(&zero);
if (timercmp(&countdown, &zero, <=)) {
timerclear(&countdown);
countdown.tv_usec = 1;
}
remaining = m_tentativeTimer.TimeRemaining();
if (!m_tentativeTimer.IsPending()
|| timercmp(&countdown, &remaining, <))
m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
}
bool
CaseFile::CloseIfSolved()
{
if (m_events.empty()
&& m_tentativeEvents.empty()) {
/*
* We currently do not track or take actions on
* devices in the degraded or faulted state.
* Once we have support for spare pools, we'll
* retain these cases so that any spares added in
* the future can be applied to them.
*/
switch (VdevState()) {
case VDEV_STATE_HEALTHY:
/* No need to keep cases for healthy vdevs */
Close();
return (true);
case VDEV_STATE_REMOVED:
case VDEV_STATE_CANT_OPEN:
/*
* Keep open. We may solve it with a newly inserted
* device.
*/
case VDEV_STATE_FAULTED:
case VDEV_STATE_DEGRADED:
/*
* Keep open. We may solve it with the future
* addition of a spare to the pool
*/
case VDEV_STATE_UNKNOWN:
case VDEV_STATE_CLOSED:
case VDEV_STATE_OFFLINE:
/*
* Keep open? This may not be the correct behavior,
* but it's what we've always done
*/
;
}
/*
* Re-serialize the case in order to remove any
* previous event data.
*/
Serialize();
}
return (false);
}
void
CaseFile::Log()
{
syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
VdevGUIDString().c_str(), PhysicalPath().c_str());
syslog(LOG_INFO, "\tVdev State = %s\n",
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
if (m_tentativeEvents.size() != 0) {
syslog(LOG_INFO, "\t=== Tentative Events ===\n");
for (EventList::iterator event(m_tentativeEvents.begin());
event != m_tentativeEvents.end(); event++)
(*event)->Log(LOG_INFO);
}
if (m_events.size() != 0) {
syslog(LOG_INFO, "\t=== Events ===\n");
for (EventList::iterator event(m_events.begin());
event != m_events.end(); event++)
(*event)->Log(LOG_INFO);
}
}
//- CaseFile Static Protected Methods ------------------------------------------
void
CaseFile::OnGracePeriodEnded(void *arg)
{
CaseFile &casefile(*static_cast<CaseFile *>(arg));
casefile.OnGracePeriodEnded();
}
int
CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
{
uint64_t poolGUID;
uint64_t vdevGUID;
if (dirEntry->d_type == DT_REG
&& sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
&poolGUID, &vdevGUID) == 2)
return (1);
return (0);
}
void
CaseFile::DeSerializeFile(const char *fileName)
{
string fullName(s_caseFilePath + '/' + fileName);
CaseFile *existingCaseFile(NULL);
CaseFile *caseFile(NULL);
try {
uint64_t poolGUID;
uint64_t vdevGUID;
nvlist_t *vdevConf;
if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
&poolGUID, &vdevGUID) != 2) {
throw ZfsdException("CaseFile::DeSerialize: "
"Unintelligible CaseFile filename %s.\n", fileName);
}
existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
if (existingCaseFile != NULL) {
/*
* If the vdev is already degraded or faulted,
* there's no point in keeping the state around
* that we use to put a drive into the degraded
* state. However, if the vdev is simply missing,
* preserve the case data in the hopes that it will
* return.
*/
caseFile = existingCaseFile;
vdev_state curState(caseFile->VdevState());
if (curState > VDEV_STATE_CANT_OPEN
&& curState < VDEV_STATE_HEALTHY) {
unlink(fileName);
return;
}
} else {
ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
if (zpl.empty()
|| (vdevConf = VdevIterator(zpl.front())
.Find(vdevGUID)) == NULL) {
/*
* Either the pool no longer exists
* or this vdev is no longer a member of
* the pool.
*/
unlink(fullName.c_str());
return;
}
/*
* Any vdev we find that does not have a case file
* must be in the healthy state and thus worthy of
* continued SERD data tracking.
*/
caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
}
ifstream caseStream(fullName.c_str());
if (!caseStream)
throw ZfsdException("CaseFile::DeSerialize: Unable to "
"read %s.\n", fileName);
caseFile->DeSerialize(caseStream);
} catch (const ParseException &exp) {
exp.Log();
if (caseFile != existingCaseFile)
delete caseFile;
/*
* Since we can't parse the file, unlink it so we don't
* trip over it again.
*/
unlink(fileName);
} catch (const ZfsdException &zfsException) {
zfsException.Log();
if (caseFile != existingCaseFile)
delete caseFile;
}
}
//- CaseFile Protected Methods -------------------------------------------------
CaseFile::CaseFile(const Vdev &vdev)
: m_poolGUID(vdev.PoolGUID()),
m_vdevGUID(vdev.GUID()),
m_vdevState(vdev.State()),
m_vdevPhysPath(vdev.PhysicalPath()),
m_is_spare(vdev.IsSpare())
{
stringstream guidString;
guidString << m_vdevGUID;
m_vdevGUIDString = guidString.str();
guidString.str("");
guidString << m_poolGUID;
m_poolGUIDString = guidString.str();
s_activeCases.push_back(this);
syslog(LOG_INFO, "Creating new CaseFile:\n");
Log();
}
CaseFile::~CaseFile()
{
PurgeEvents();
PurgeTentativeEvents();
m_tentativeTimer.Stop();
s_activeCases.remove(this);
}
void
CaseFile::PurgeEvents()
{
for (EventList::iterator event(m_events.begin());
event != m_events.end(); event++)
delete *event;
m_events.clear();
}
void
CaseFile::PurgeTentativeEvents()
{
for (EventList::iterator event(m_tentativeEvents.begin());
event != m_tentativeEvents.end(); event++)
delete *event;
m_tentativeEvents.clear();
}
void
CaseFile::SerializeEvList(const EventList events, int fd,
const char* prefix) const
{
if (events.empty())
return;
for (EventList::const_iterator curEvent = events.begin();
curEvent != events.end(); curEvent++) {
const string &eventString((*curEvent)->GetEventString());
// TODO: replace many write(2) calls with a single writev(2)
if (prefix)
write(fd, prefix, strlen(prefix));
write(fd, eventString.c_str(), eventString.length());
}
}
void
CaseFile::Serialize()
{
stringstream saveFile;
saveFile << setfill('0')
<< s_caseFilePath << "/"
<< "pool_" << PoolGUIDString()
<< "_vdev_" << VdevGUIDString()
<< ".case";
if (m_events.empty() && m_tentativeEvents.empty()) {
unlink(saveFile.str().c_str());
return;
}
int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
if (fd == -1) {
syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
saveFile.str().c_str());
return;
}
SerializeEvList(m_events, fd);
SerializeEvList(m_tentativeEvents, fd, "tentative ");
close(fd);
}
/*
* XXX: This method assumes that events may not contain embedded newlines. If
* ever events can contain embedded newlines, then CaseFile must switch
* serialization formats
*/
void
CaseFile::DeSerialize(ifstream &caseStream)
{
string evString;
const EventFactory &factory(ZfsDaemon::Get().GetFactory());
caseStream >> std::noskipws >> std::ws;
while (caseStream.good()) {
/*
* Outline:
* read the beginning of a line and check it for
* "tentative". If found, discard "tentative".
* Create a new event
* continue
*/
EventList* destEvents;
const string tentFlag("tentative ");
string line;
std::stringbuf lineBuf;
caseStream.get(lineBuf);
caseStream.ignore(); /*discard the newline character*/
line = lineBuf.str();
if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
/* Discard "tentative" */
line.erase(0, tentFlag.size());
destEvents = &m_tentativeEvents;
} else {
destEvents = &m_events;
}
Event *event(Event::CreateEvent(factory, line));
if (event != NULL) {
destEvents->push_back(event);
RegisterCallout(*event);
}
}
}
void
CaseFile::Close()
{
/*
* This case is no longer relevant. Clean up our
* serialization file, and delete the case.
*/
syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
PoolGUIDString().c_str(), VdevGUIDString().c_str(),
zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
/*
* Serialization of a Case with no event data, clears the
* Serialization data for that event.
*/
PurgeEvents();
Serialize();
delete this;
}
void
CaseFile::OnGracePeriodEnded()
{
bool should_fault, should_degrade;
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
m_events.splice(m_events.begin(), m_tentativeEvents);
should_fault = ShouldFault();
should_degrade = ShouldDegrade();
if (should_fault || should_degrade) {
if (zhp == NULL
|| (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
/*
* Either the pool no longer exists
* or this vdev is no longer a member of
* the pool.
*/
Close();
return;
}
}
/* A fault condition has priority over a degrade condition */
if (ShouldFault()) {
/* Fault the vdev and close the case. */
if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
VDEV_AUX_ERR_EXCEEDED) == 0) {
syslog(LOG_INFO, "Faulting vdev(%s/%s)",
PoolGUIDString().c_str(),
VdevGUIDString().c_str());
Close();
return;
}
else {
syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str(),
libzfs_error_action(g_zfsHandle),
libzfs_error_description(g_zfsHandle));
}
}
else if (ShouldDegrade()) {
/* Degrade the vdev and close the case. */
if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
VDEV_AUX_ERR_EXCEEDED) == 0) {
syslog(LOG_INFO, "Degrading vdev(%s/%s)",
PoolGUIDString().c_str(),
VdevGUIDString().c_str());
Close();
return;
}
else {
syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
PoolGUIDString().c_str(),
VdevGUIDString().c_str(),
libzfs_error_action(g_zfsHandle),
libzfs_error_description(g_zfsHandle));
}
}
Serialize();
}
Vdev
CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
Vdev vd(zhp, CaseVdev(zhp));
std::list<Vdev> children;
std::list<Vdev>::iterator children_it;
Vdev parent(vd.Parent());
Vdev replacing(NonexistentVdev);
/*
* To determine whether we are being replaced by another spare that
* is still working, then make sure that it is currently spared and
* that the spare is either resilvering or healthy. If any of these
* conditions fail, then we are not being replaced by a spare.
*
* If the spare is healthy, then the case file should be closed very
* soon after this check.
*/
if (parent.DoesNotExist()
|| parent.Name(zhp, /*verbose*/false) != "spare")
return (NonexistentVdev);
children = parent.Children();
children_it = children.begin();
for (;children_it != children.end(); children_it++) {
Vdev child = *children_it;
/* Skip our vdev. */
if (child.GUID() == VdevGUID())
continue;
/*
* Accept the first child that doesn't match our GUID, or
* any resilvering/healthy device if one exists.
*/
if (replacing.DoesNotExist() || child.IsResilvering()
|| child.State() == VDEV_STATE_HEALTHY)
replacing = child;
}
return (replacing);
}
bool
CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
nvlist_t *nvroot, *newvd;
const char *poolname;
string oldstr(VdevGUIDString());
bool retval = true;
/* Figure out what pool we're working on */
ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
if (zhp == NULL) {
syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
"pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
return (false);
}
poolname = zpool_get_name(zhp);
Vdev vd(zhp, CaseVdev(zhp));
Vdev replaced(BeingReplacedBy(zhp));
if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
/* If we are already being replaced by a working spare, pass. */
if (replaced.IsResilvering()
|| replaced.State() == VDEV_STATE_HEALTHY) {
syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
"replaced", VdevGUIDString().c_str(), path);
return (/*consumed*/false);
}
/*
* If we have already been replaced by a spare, but that spare
* is broken, we must spare the spare, not the original device.
*/
oldstr = replaced.GUIDString();
syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
"broken spare %s instead", VdevGUIDString().c_str(),
path, oldstr.c_str());
}
/*
* Build a root vdev/leaf vdev configuration suitable for
* zpool_vdev_attach. Only enough data for the kernel to find
* the device (i.e. type and disk device node path) are needed.
*/
nvroot = NULL;
newvd = NULL;
if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
|| nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
"configuration data.", poolname, oldstr.c_str());
if (nvroot != NULL)
nvlist_free(nvroot);
return (false);
}
if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
|| nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
|| nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
|| nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
&newvd, 1) != 0) {
syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
"configuration data.", poolname, oldstr.c_str());
nvlist_free(newvd);
nvlist_free(nvroot);
return (true);
}
/* Data was copied when added to the root vdev. */
nvlist_free(newvd);
retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
/*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0);
if (retval)
syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
poolname, oldstr.c_str(), path);
else
syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
libzfs_error_description(g_zfsHandle));
nvlist_free(nvroot);
return (retval);
}
/* Does the argument event refer to a checksum error? */
static bool
IsChecksumEvent(const Event* const event)
{
return ("ereport.fs.zfs.checksum" == event->Value("type"));
}
/* Does the argument event refer to an IO error? */
static bool
IsIOEvent(const Event* const event)
{
return ("ereport.fs.zfs.io" == event->Value("type"));
}
bool
CaseFile::ShouldDegrade() const
{
return (std::count_if(m_events.begin(), m_events.end(),
IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
}
bool
CaseFile::ShouldFault() const
{
return (std::count_if(m_events.begin(), m_events.end(),
IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
}
nvlist_t *
CaseFile::CaseVdev(zpool_handle_t *zhp) const
{
return (VdevIterator(zhp).Find(VdevGUID()));
}