zfsd(8): Close a race condition when onlining a disk paritition

When inserting a partitioned disk, devfs and geom will announce the whole
disk before they announce the partition. If the partition containing ZFS
extends to one of the disk's extents, then zfsd will see a ZFS label on the
whole disk and attempt to online it. ZFS is smart enough to activate the
partition instead of the whole disk, but only if GEOM has already created
the partition's provider.

cddl/contrib/opensolaris/lib/libzfs/common/libzfs.h
cddl/contrib/opensolaris/lib/libzfs/common/libzfs_import.c
	Add a zpool_read_all_labels method. It's similar to
	zpool_read_label, but it will return the number of labels found.

cddl/usr.sbin/zfsd/zfsd_event.cc
	When processing a DevFS CREATE event, only online a VDEV if we can
	read all four ZFS labels.

Reviewed by:	mav
MFC after:	3 weeks
Sponsored by:	Spectra Logic Corp
Differential Revision:	https://reviews.freebsd.org/D11920
This commit is contained in:
asomers 2017-08-24 19:48:41 +00:00
parent 103a56551d
commit c41928d204
3 changed files with 72 additions and 2 deletions

View File

@ -772,6 +772,7 @@ extern int zpool_in_use(libzfs_handle_t *, int, pool_state_t *, char **,
* Label manipulation. * Label manipulation.
*/ */
extern int zpool_read_label(int, nvlist_t **); extern int zpool_read_label(int, nvlist_t **);
extern int zpool_read_all_labels(int, nvlist_t **);
extern int zpool_clear_label(int); extern int zpool_clear_label(int);
/* is this zvol valid for use as a dump device? */ /* is this zvol valid for use as a dump device? */

View File

@ -914,6 +914,65 @@ zpool_read_label(int fd, nvlist_t **config)
return (0); return (0);
} }
/*
* Given a file descriptor, read the label information and return an nvlist
* describing the configuration, if there is one.
* returns the number of valid labels found
*/
int
zpool_read_all_labels(int fd, nvlist_t **config)
{
struct stat64 statbuf;
int l;
vdev_label_t *label;
uint64_t state, txg, size;
int nlabels = 0;
*config = NULL;
if (fstat64(fd, &statbuf) == -1)
return (0);
size = P2ALIGN_TYPED(statbuf.st_size, sizeof (vdev_label_t), uint64_t);
if ((label = malloc(sizeof (vdev_label_t))) == NULL)
return (0);
for (l = 0; l < VDEV_LABELS; l++) {
nvlist_t *temp = NULL;
/* TODO: use aio_read so we can read al 4 labels in parallel */
if (pread64(fd, label, sizeof (vdev_label_t),
label_offset(size, l)) != sizeof (vdev_label_t))
continue;
if (nvlist_unpack(label->vl_vdev_phys.vp_nvlist,
sizeof (label->vl_vdev_phys.vp_nvlist), &temp, 0) != 0)
continue;
if (nvlist_lookup_uint64(temp, ZPOOL_CONFIG_POOL_STATE,
&state) != 0 || state > POOL_STATE_L2CACHE) {
nvlist_free(temp);
temp = NULL;
continue;
}
if (state != POOL_STATE_SPARE && state != POOL_STATE_L2CACHE &&
(nvlist_lookup_uint64(temp, ZPOOL_CONFIG_POOL_TXG,
&txg) != 0 || txg == 0)) {
nvlist_free(temp);
temp = NULL;
continue;
}
if (temp)
*config = temp;
nlabels++;
}
free(label);
return (nlabels);
}
typedef struct rdsk_node { typedef struct rdsk_node {
char *rn_name; char *rn_name;
int rn_dfd; int rn_dfd;

View File

@ -36,6 +36,7 @@
#include <sys/cdefs.h> #include <sys/cdefs.h>
#include <sys/time.h> #include <sys/time.h>
#include <sys/fs/zfs.h> #include <sys/fs/zfs.h>
#include <sys/vdev_impl.h>
#include <syslog.h> #include <syslog.h>
@ -93,6 +94,7 @@ DevfsEvent::ReadLabel(int devFd, bool &inUse, bool &degraded)
pool_state_t poolState; pool_state_t poolState;
char *poolName; char *poolName;
boolean_t b_inuse; boolean_t b_inuse;
int nlabels;
inUse = false; inUse = false;
degraded = false; degraded = false;
@ -105,8 +107,16 @@ DevfsEvent::ReadLabel(int devFd, bool &inUse, bool &degraded)
if (poolName != NULL) if (poolName != NULL)
free(poolName); free(poolName);
if (zpool_read_label(devFd, &devLabel) != 0 nlabels = zpool_read_all_labels(devFd, &devLabel);
|| devLabel == NULL) /*
* If we find a disk with fewer than the maximum number of
* labels, it might be the whole disk of a partitioned disk
* where ZFS resides on a partition. In that case, we should do
* nothing and wait for the partition to appear. Or, the disk
* might be damaged. In that case, zfsd should do nothing and
* wait for the sysadmin to decide.
*/
if (nlabels != VDEV_LABELS || devLabel == NULL)
return (NULL); return (NULL);
try { try {