asomers 442baa5184 zfsd(8), the ZFS fault management daemon
Add zfsd, which deals with hard drive faults in ZFS pools. It manages
hotspares and replements in drive slots that publish physical paths.

cddl/usr.sbin/zfsd
	Add zfsd(8) and its unit tests

cddl/usr.sbin/Makefile
	Add zfsd to the build

lib/libdevdctl
	A C++ library that helps devd clients process events

lib/Makefile
share/mk/bsd.libnames.mk
share/mk/src.libnames.mk
	Add libdevdctl to the build. It's a private library, unusable by
	out-of-tree software.

etc/defaults/rc.conf
	By default, set zfsd_enable to NO

etc/mtree/BSD.include.dist
	Add a directory for libdevdctl's include files

etc/mtree/BSD.tests.dist
	Add a directory for zfsd's unit tests

etc/mtree/BSD.var.dist
	Add /var/db/zfsd/cases, where zfsd stores case files while it's shut
	down.

etc/rc.d/Makefile
etc/rc.d/zfsd
	Add zfsd's rc script

sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c
	Fix the resource.fs.zfs.statechange message. It had a number of
	problems:

	It was only being emitted on a transition to the HEALTHY state.
	That made it impossible for zfsd to take actions based on drives
	getting sicker.

	It compared the new state to vdev_prevstate, which is the state that
	the vdev had the last time it was opened.  That doesn't make sense,
	because a vdev can change state multiple times without being
	reopened.

	vdev_set_state contains logic that will change the device's new
	state based on various conditions.  However, the statechange event
	was being posted _before_ that logic took effect.  Now it's being
	posted after.

Submitted by:	gibbs, asomers, mav, allanjude
Reviewed by:	mav, delphij
Relnotes:	yes
Sponsored by:	Spectra Logic Corp, iX Systems
Differential Revision:	https://reviews.freebsd.org/D6564
2016-05-28 17:43:40 +00:00

427 lines
12 KiB
C++

/*-
* Copyright (c) 2011, 2012, 2013 Spectra Logic Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* substantially similar to the "NO WARRANTY" disclaimer below
* ("Disclaimer") and any redistribution must be conditioned upon
* including a substantially similar Disclaimer requirement for further
* binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGES.
*
* Authors: Justin T. Gibbs (Spectra Logic Corporation)
*
* $FreeBSD$
*/
/**
* \file case_file.h
*
* CaseFile objects aggregate vdev faults that may require ZFSD action
* in order to maintain the health of a ZFS pool.
*
* Header requirements:
*
* #include <list>
*
* #include "callout.h"
* #include "zfsd_event.h"
*/
#ifndef _CASE_FILE_H_
#define _CASE_FILE_H_
/*=========================== Forward Declarations ===========================*/
class CaseFile;
class Vdev;
/*============================= Class Definitions ============================*/
/*------------------------------- CaseFileList -------------------------------*/
/**
* CaseFileList is a specialization of the standard list STL container.
*/
typedef std::list< CaseFile *> CaseFileList;
/*--------------------------------- CaseFile ---------------------------------*/
/**
* A CaseFile object is instantiated anytime a vdev for an active pool
* experiences an I/O error, is faulted by ZFS, or is determined to be
* missing/removed.
*
* A vdev may have at most one CaseFile.
*
* CaseFiles are retired when a vdev leaves an active pool configuration
* or an action is taken to resolve the issues recorded in the CaseFile.
*
* Logging a case against a vdev does not imply that an immediate action
* to resolve a fault is required or even desired. For example, a CaseFile
* must accumulate a number of I/O errors in order to flag a device as
* degraded.
*
* Vdev I/O errors are not recorded in ZFS label inforamation. For this
* reasons, CaseFile%%s with accumulated I/O error events are serialized
* to the file system so that they survive across boots. Currently all
* other fault types can be reconstructed from ZFS label information, so
* CaseFile%%s for missing, faulted, or degradded members are just recreated
* at ZFSD startup instead of being deserialized from the file system.
*/
class CaseFile
{
public:
/**
* \brief Find a CaseFile object by a vdev's pool/vdev GUID tuple.
*
* \param poolGUID Pool GUID for the vdev of the CaseFile to find.
* \param vdevGUID Vdev GUID for the vdev of the CaseFile to find.
*
* \return If found, a pointer to a valid CaseFile object.
* Otherwise NULL.
*/
static CaseFile *Find(DevdCtl::Guid poolGUID, DevdCtl::Guid vdevGUID);
/**
* \brief Find a CaseFile object by a vdev's current/last known
* physical path.
*
* \param physPath Physical path of the vdev of the CaseFile to find.
*
* \return If found, a pointer to a valid CaseFile object.
* Otherwise NULL.
*/
static CaseFile *Find(const string &physPath);
/**
* \brief ReEvaluate all open cases whose pool guid matches the argument
*
* \param poolGUID Only reevaluate cases for this pool
* \param event Try to consume this event with the casefile
*/
static void ReEvaluateByGuid(DevdCtl::Guid poolGUID,
const ZfsEvent &event);
/**
* \brief Create or return an existing active CaseFile for the
* specified vdev.
*
* \param vdev The vdev object for which to find/create a CaseFile.
*
* \return A reference to a valid CaseFile object.
*/
static CaseFile &Create(Vdev &vdev);
/**
* \brief Deserialize all serialized CaseFile objects found in
* the file system.
*/
static void DeSerialize();
/**
* \brief Emit syslog data on all active CaseFile%%s in the system.
*/
static void LogAll();
/**
* \brief Destroy the in-core cache of CaseFile data.
*
* This routine does not disturb the on disk, serialized, CaseFile
* data.
*/
static void PurgeAll();
DevdCtl::Guid PoolGUID() const;
DevdCtl::Guid VdevGUID() const;
vdev_state VdevState() const;
const string &PoolGUIDString() const;
const string &VdevGUIDString() const;
const string &PhysicalPath() const;
/**
* \brief Attempt to resolve this CaseFile using the disk
* resource at the given device/physical path/vdev object
* tuple.
*
* \param devPath The devfs path for the disk resource.
* \param physPath The physical path information reported by
* the disk resource.
* \param vdev If the disk contains ZFS label information,
* a pointer to the disk label's vdev object
* data. Otherwise NULL.
*
* \return True if this event was consumed by this CaseFile.
*/
bool ReEvaluate(const string &devPath, const string &physPath,
Vdev *vdev);
/**
* \brief Update this CaseFile in light of the provided ZfsEvent.
*
* Must be virtual so it can be overridden in the unit tests
*
* \param event The ZfsEvent to evaluate.
*
* \return True if this event was consumed by this CaseFile.
*/
virtual bool ReEvaluate(const ZfsEvent &event);
/**
* \brief Register an itimer callout for the given event, if necessary
*/
virtual void RegisterCallout(const DevdCtl::Event &event);
/**
* \brief Close a case if it is no longer relevant.
*
* This method deals with cases tracking soft errors. Soft errors
* will be discarded should a remove event occur within a short period
* of the soft errors being reported. We also discard the events
* if the vdev is marked degraded or failed.
*
* \return True if the case is closed. False otherwise.
*/
bool CloseIfSolved();
/**
* \brief Emit data about this CaseFile via syslog(3).
*/
void Log();
/**
* \brief Whether we should degrade this vdev
*/
bool ShouldDegrade() const;
/**
* \brief Whether we should fault this vdev
*/
bool ShouldFault() const;
protected:
enum {
/**
* The number of soft errors on a vdev required
* to transition a vdev from healthy to degraded
* status.
*/
ZFS_DEGRADE_IO_COUNT = 50
};
static CalloutFunc_t OnGracePeriodEnded;
/**
* \brief scandir(3) filter function used to find files containing
* serialized CaseFile data.
*
* \param dirEntry Directory entry for the file to filter.
*
* \return Non-zero for a file to include in the selection,
* otherwise 0.
*/
static int DeSerializeSelector(const struct dirent *dirEntry);
/**
* \brief Given the name of a file containing serialized events from a
* CaseFile object, create/update an in-core CaseFile object
* representing the serialized data.
*
* \param fileName The name of a file containing serialized events
* from a CaseFile object.
*/
static void DeSerializeFile(const char *fileName);
/** Constructor. */
CaseFile(const Vdev &vdev);
/**
* Destructor.
* Must be virtual so it can be subclassed in the unit tests
*/
virtual ~CaseFile();
/**
* \brief Reload state for the vdev associated with this CaseFile.
*
* \return True if the refresh was successful. False if the system
* has no record of the pool or vdev for this CaseFile.
*/
virtual bool RefreshVdevState();
/**
* \brief Free all events in the m_events list.
*/
void PurgeEvents();
/**
* \brief Free all events in the m_tentativeEvents list.
*/
void PurgeTentativeEvents();
/**
* \brief Commit to file system storage.
*/
void Serialize();
/**
* \brief Retrieve event data from a serialization stream.
*
* \param caseStream The serializtion stream to parse.
*/
void DeSerialize(std::ifstream &caseStream);
/**
* \brief Serializes the supplied event list and writes it to fd
*
* \param prefix If not NULL, this prefix will be prepended to
* every event in the file.
*/
void SerializeEvList(const DevdCtl::EventList events, int fd,
const char* prefix=NULL) const;
/**
* \brief Unconditionally close a CaseFile.
*/
virtual void Close();
/**
* \brief Callout callback invoked when the remove timer grace
* period expires.
*
* If no remove events are received prior to the grace period
* firing, then any tentative events are promoted and counted
* against the health of the vdev.
*/
void OnGracePeriodEnded();
/**
* \brief Attempt to activate a spare on this case's pool.
*
* Call this whenever a pool becomes degraded. It will look for any
* spare devices and activate one to replace the casefile's vdev. It
* will _not_ close the casefile; that should only happen when the
* missing drive is replaced or the user promotes the spare.
*
* \return True if a spare was activated
*/
bool ActivateSpare();
/**
* \brief replace a pool's vdev with another
*
* \param vdev_type The type of the new vdev. Usually either
* VDEV_TYPE_DISK or VDEV_TYPE_FILE
* \param path The file system path to the new vdev
* \param isspare Whether the new vdev is a spare
*
* \return true iff the replacement was successful
*/
bool Replace(const char* vdev_type, const char* path, bool isspare);
/**
* \brief Which vdev, if any, is replacing ours.
*
* \param zhp Pool handle state from the caller context
*
* \return the vdev that is currently replacing ours,
* or NonexistentVdev if there isn't one.
*/
Vdev BeingReplacedBy(zpool_handle_t *zhp);
/**
* \brief All CaseFiles being tracked by ZFSD.
*/
static CaseFileList s_activeCases;
/**
* \brief The file system path to serialized CaseFile data.
*/
static const string s_caseFilePath;
/**
* \brief The time ZFSD waits before promoting a tentative event
* into a permanent event.
*/
static const timeval s_removeGracePeriod;
/**
* \brief A list of soft error events counted against the health of
* a vdev.
*/
DevdCtl::EventList m_events;
/**
* \brief A list of soft error events waiting for a grace period
* expiration before being counted against the health of
* a vdev.
*/
DevdCtl::EventList m_tentativeEvents;
DevdCtl::Guid m_poolGUID;
DevdCtl::Guid m_vdevGUID;
vdev_state m_vdevState;
string m_poolGUIDString;
string m_vdevGUIDString;
string m_vdevPhysPath;
/**
* \brief Callout activated when a grace period
*/
Callout m_tentativeTimer;
private:
nvlist_t *CaseVdev(zpool_handle_t *zhp) const;
};
inline DevdCtl::Guid
CaseFile::PoolGUID() const
{
return (m_poolGUID);
}
inline DevdCtl::Guid
CaseFile::VdevGUID() const
{
return (m_vdevGUID);
}
inline vdev_state
CaseFile::VdevState() const
{
return (m_vdevState);
}
inline const string &
CaseFile::PoolGUIDString() const
{
return (m_poolGUIDString);
}
inline const string &
CaseFile::VdevGUIDString() const
{
return (m_vdevGUIDString);
}
inline const string &
CaseFile::PhysicalPath() const
{
return (m_vdevPhysPath);
}
#endif /* _CASE_FILE_H_ */