OpenZFS restructuring - libzutil

Factor Linux specific functionality out of libzutil.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matt Macy <mmacy@FreeBSD.org>
Signed-off-by: Ryan Moeller <ryan@ixsystems.com>
Closes #9356
This commit is contained in:
Matthew Macy 2019-10-03 10:20:44 -07:00 committed by Brian Behlendorf
parent e1c216fb0c
commit 7c5eff9400
8 changed files with 1508 additions and 1337 deletions

View File

@ -191,8 +191,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
char rawpath[PATH_MAX], fullpath[PATH_MAX];
char devpath[PATH_MAX];
int ret;
int is_dm = 0;
int is_sd = 0;
boolean_t is_dm = B_FALSE;
boolean_t is_sd = B_FALSE;
uint_t c;
vdev_stat_t *vs;
@ -220,8 +220,8 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
is_dm = zfs_dev_is_dm(path);
zed_log_msg(LOG_INFO, "zfs_process_add: pool '%s' vdev '%s', phys '%s'"
" wholedisk %d, dm %d (%llu)", zpool_get_name(zhp), path,
physpath ? physpath : "NULL", wholedisk, is_dm,
" wholedisk %d, %s dm (guid %llu)", zpool_get_name(zhp), path,
physpath ? physpath : "NULL", wholedisk, is_dm ? "is" : "not",
(long long unsigned int)guid);
/*
@ -266,7 +266,7 @@ zfs_process_add(zpool_handle_t *zhp, nvlist_t *vdev, boolean_t labeled)
* testing)
*/
if (physpath != NULL && strcmp("scsidebug", physpath) == 0)
is_sd = 1;
is_sd = B_TRUE;
/*
* If the pool doesn't have the autoreplace property set, then use

View File

@ -79,15 +79,10 @@ extern const char * const * zpool_default_search_paths(size_t *count);
extern int zpool_read_label(int, nvlist_t **, int *);
extern int zpool_label_disk_wait(const char *, int);
#ifdef HAVE_LIBUDEV
struct udev_device;
extern int zfs_device_get_devid(struct udev_device *, char *, size_t);
extern int zfs_device_get_physical(struct udev_device *, char *, size_t);
#else
#define zfs_device_get_devid(dev, bufptr, buflen) (ENODATA)
#define zfs_device_get_physical(dev, bufptr, buflen) (ENODATA)
#endif
extern void update_vdev_config_dev_strs(nvlist_t *);
@ -106,16 +101,12 @@ extern char *zfs_strip_partition_path(char *);
extern int zfs_strcmp_pathname(const char *, const char *, int);
extern int zfs_dev_is_dm(const char *);
extern int zfs_dev_is_whole_disk(const char *);
extern boolean_t zfs_dev_is_dm(const char *);
extern boolean_t zfs_dev_is_whole_disk(const char *);
extern char *zfs_get_underlying_path(const char *);
extern char *zfs_get_enclosure_sysfs_path(const char *);
#ifdef HAVE_LIBUDEV
extern boolean_t is_mpath_whole_disk(const char *);
#else
#define is_mpath_whole_disk(path) (B_FALSE)
#endif
extern boolean_t zfs_isnumber(const char *);

View File

@ -3,14 +3,23 @@ include $(top_srcdir)/config/Rules.am
# Suppress unused but set variable warnings often due to ASSERTs
AM_CFLAGS += $(NO_UNUSED_BUT_SET_VARIABLE)
DEFAULT_INCLUDES += -I.
noinst_LTLIBRARIES = libzutil.la
USER_C = \
zutil_device_path.c \
zutil_import.c \
zutil_import.h \
zutil_nicenum.c \
zutil_pool.c
if BUILD_LINUX
USER_C += \
os/linux/zutil_device_path_os.c \
os/linux/zutil_import_os.c
endif
nodist_libzutil_la_SOURCES = $(USER_C)
libzutil_la_LIBADD = \

View File

@ -0,0 +1,493 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <ctype.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/efi_partition.h>
#ifdef HAVE_LIBUDEV
#include <libudev.h>
#endif
#include <libzutil.h>
/*
* Append partition suffix to an otherwise fully qualified device path.
* This is used to generate the name the full path as its stored in
* ZPOOL_CONFIG_PATH for whole disk devices. On success the new length
* of 'path' will be returned on error a negative value is returned.
*/
int
zfs_append_partition(char *path, size_t max_len)
{
int len = strlen(path);
if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
(strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
if (len + 6 >= max_len)
return (-1);
(void) strcat(path, "-part1");
len += 6;
} else {
if (len + 2 >= max_len)
return (-1);
if (isdigit(path[len-1])) {
(void) strcat(path, "p1");
len += 2;
} else {
(void) strcat(path, "1");
len += 1;
}
}
return (len);
}
/*
* Allocate and return the underlying device name for a device mapper device.
* If a device mapper device maps to multiple devices, return the first device.
*
* For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
* DM device (like /dev/disk/by-vdev/A0) are also allowed.
*
* Returns device name, or NULL on error or no match. If dm_name is not a DM
* device then return NULL.
*
* NOTE: The returned name string must be *freed*.
*/
static char *
dm_get_underlying_path(const char *dm_name)
{
DIR *dp = NULL;
struct dirent *ep;
char *realp;
char *tmp = NULL;
char *path = NULL;
char *dev_str;
int size;
if (dm_name == NULL)
return (NULL);
/* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
realp = realpath(dm_name, NULL);
if (realp == NULL)
return (NULL);
/*
* If they preface 'dev' with a path (like "/dev") then strip it off.
* We just want the 'dm-N' part.
*/
tmp = strrchr(realp, '/');
if (tmp != NULL)
dev_str = tmp + 1; /* +1 since we want the chr after '/' */
else
dev_str = tmp;
size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
if (size == -1 || !tmp)
goto end;
dp = opendir(tmp);
if (dp == NULL)
goto end;
/* Return first sd* entry in /sys/block/dm-N/slaves/ */
while ((ep = readdir(dp))) {
if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
size = asprintf(&path, "/dev/%s", ep->d_name);
break;
}
}
end:
if (dp != NULL)
closedir(dp);
free(tmp);
free(realp);
return (path);
}
/*
* Return B_TRUE if device is a device mapper or multipath device.
* Return B_FALSE if not.
*/
boolean_t
zfs_dev_is_dm(const char *dev_name)
{
char *tmp;
tmp = dm_get_underlying_path(dev_name);
if (tmp == NULL)
return (B_FALSE);
free(tmp);
return (B_TRUE);
}
/*
* By "whole disk" we mean an entire physical disk (something we can
* label, toggle the write cache on, etc.) as opposed to the full
* capacity of a pseudo-device such as lofi or did. We act as if we
* are labeling the disk, which should be a pretty good test of whether
* it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
* it isn't.
*/
boolean_t
zfs_dev_is_whole_disk(const char *dev_name)
{
struct dk_gpt *label;
int fd;
if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
return (B_FALSE);
if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
(void) close(fd);
return (B_FALSE);
}
efi_free(label);
(void) close(fd);
return (B_TRUE);
}
/*
* Lookup the underlying device for a device name
*
* Often you'll have a symlink to a device, a partition device,
* or a multipath device, and want to look up the underlying device.
* This function returns the underlying device name. If the device
* name is already the underlying device, then just return the same
* name. If the device is a DM device with multiple underlying devices
* then return the first one.
*
* For example:
*
* 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
* dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
* returns: /dev/sda
*
* 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
* dev_name: /dev/mapper/mpatha
* returns: /dev/sda (first device)
*
* 3. /dev/sda (already the underlying device)
* dev_name: /dev/sda
* returns: /dev/sda
*
* 4. /dev/dm-3 (mapped to /dev/sda)
* dev_name: /dev/dm-3
* returns: /dev/sda
*
* 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
* dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
* returns: /dev/sdb
*
* 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
* dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
* returns: /dev/sda
*
* Returns underlying device name, or NULL on error or no match.
*
* NOTE: The returned name string must be *freed*.
*/
char *
zfs_get_underlying_path(const char *dev_name)
{
char *name = NULL;
char *tmp;
if (dev_name == NULL)
return (NULL);
tmp = dm_get_underlying_path(dev_name);
/* dev_name not a DM device, so just un-symlinkize it */
if (tmp == NULL)
tmp = realpath(dev_name, NULL);
if (tmp != NULL) {
name = zfs_strip_partition_path(tmp);
free(tmp);
}
return (name);
}
/*
* Given a dev name like "sda", return the full enclosure sysfs path to
* the disk. You can also pass in the name with "/dev" prepended
* to it (like /dev/sda).
*
* For example, disk "sda" in enclosure slot 1:
* dev: "sda"
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
*
* 'dev' must be a non-devicemapper device.
*
* Returned string must be freed.
*/
char *
zfs_get_enclosure_sysfs_path(const char *dev_name)
{
DIR *dp = NULL;
struct dirent *ep;
char buf[MAXPATHLEN];
char *tmp1 = NULL;
char *tmp2 = NULL;
char *tmp3 = NULL;
char *path = NULL;
size_t size;
int tmpsize;
if (dev_name == NULL)
return (NULL);
/* If they preface 'dev' with a path (like "/dev") then strip it off */
tmp1 = strrchr(dev_name, '/');
if (tmp1 != NULL)
dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
if (tmpsize == -1 || tmp1 == NULL) {
tmp1 = NULL;
goto end;
}
dp = opendir(tmp1);
if (dp == NULL) {
tmp1 = NULL; /* To make free() at the end a NOP */
goto end;
}
/*
* Look though all sysfs entries in /sys/block/<dev>/device for
* the enclosure symlink.
*/
while ((ep = readdir(dp))) {
/* Ignore everything that's not our enclosure_device link */
if (strstr(ep->d_name, "enclosure_device") == NULL)
continue;
if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
tmp2 == NULL)
break;
size = readlink(tmp2, buf, sizeof (buf));
/* Did readlink fail or crop the link name? */
if (size == -1 || size >= sizeof (buf)) {
free(tmp2);
tmp2 = NULL; /* To make free() at the end a NOP */
break;
}
/*
* We got a valid link. readlink() doesn't terminate strings
* so we have to do it.
*/
buf[size] = '\0';
/*
* Our link will look like:
*
* "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
*
* We want to grab the "enclosure/1:0:3:0/SLOT 1" part
*/
tmp3 = strstr(buf, "enclosure");
if (tmp3 == NULL)
break;
if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
/* If asprintf() fails, 'path' is undefined */
path = NULL;
break;
}
if (path == NULL)
break;
}
end:
free(tmp2);
free(tmp1);
if (dp != NULL)
closedir(dp);
return (path);
}
/*
* Remove partition suffix from a vdev path. Partition suffixes may take three
* forms: "-partX", "pX", or "X", where X is a string of digits. The second
* case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
* third case only occurs when preceded by a string matching the regular
* expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
*
* caller must free the returned string
*/
char *
zfs_strip_partition(char *path)
{
char *tmp = strdup(path);
char *part = NULL, *d = NULL;
if (!tmp)
return (NULL);
if ((part = strstr(tmp, "-part")) && part != tmp) {
d = part + 5;
} else if ((part = strrchr(tmp, 'p')) &&
part > tmp + 1 && isdigit(*(part-1))) {
d = part + 1;
} else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
tmp[1] == 'd') {
for (d = &tmp[2]; isalpha(*d); part = ++d) { }
} else if (strncmp("xvd", tmp, 3) == 0) {
for (d = &tmp[3]; isalpha(*d); part = ++d) { }
}
if (part && d && *d != '\0') {
for (; isdigit(*d); d++) { }
if (*d == '\0')
*part = '\0';
}
return (tmp);
}
/*
* Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
*
* path: /dev/sda1
* returns: /dev/sda
*
* Returned string must be freed.
*/
char *
zfs_strip_partition_path(char *path)
{
char *newpath = strdup(path);
char *sd_offset;
char *new_sd;
if (!newpath)
return (NULL);
/* Point to "sda1" part of "/dev/sda1" */
sd_offset = strrchr(newpath, '/') + 1;
/* Get our new name "sda" */
new_sd = zfs_strip_partition(sd_offset);
if (!new_sd) {
free(newpath);
return (NULL);
}
/* Paste the "sda" where "sda1" was */
strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
/* Free temporary "sda" */
free(new_sd);
return (newpath);
}
#ifdef HAVE_LIBUDEV
/*
* A disk is considered a multipath whole disk when:
* DEVNAME key value has "dm-"
* DM_NAME key value has "mpath" prefix
* DM_UUID key exists
* ID_PART_TABLE_TYPE key does not exist or is not gpt
*/
static boolean_t
udev_mpath_whole_disk(struct udev_device *dev)
{
const char *devname, *type, *uuid;
devname = udev_device_get_property_value(dev, "DEVNAME");
type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
uuid = udev_device_get_property_value(dev, "DM_UUID");
if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
((type == NULL) || (strcmp(type, "gpt") != 0)) &&
(uuid != NULL)) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Check if a disk is effectively a multipath whole disk
*/
boolean_t
is_mpath_whole_disk(const char *path)
{
struct udev *udev;
struct udev_device *dev = NULL;
char nodepath[MAXPATHLEN];
char *sysname;
boolean_t wholedisk = B_FALSE;
if (realpath(path, nodepath) == NULL)
return (B_FALSE);
sysname = strrchr(nodepath, '/') + 1;
if (strncmp(sysname, "dm-", 3) != 0)
return (B_FALSE);
if ((udev = udev_new()) == NULL)
return (B_FALSE);
if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
sysname)) == NULL) {
udev_device_unref(dev);
return (B_FALSE);
}
wholedisk = udev_mpath_whole_disk(dev);
udev_device_unref(dev);
return (wholedisk);
}
#else /* HAVE_LIBUDEV */
/* ARGSUSED */
boolean_t
is_mpath_whole_disk(const char *path)
{
return (B_FALSE);
}
#endif /* HAVE_LIBUDEV */

View File

@ -0,0 +1,856 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright 2015 RackTop Systems.
* Copyright (c) 2016, Intel Corporation.
*/
/*
* Pool import support functions.
*
* Used by zpool, ztest, zdb, and zhack to locate importable configs. Since
* these commands are expected to run in the global zone, we can assume
* that the devices are all readable when called.
*
* To import a pool, we rely on reading the configuration information from the
* ZFS label of each device. If we successfully read the label, then we
* organize the configuration information in the following hierarchy:
*
* pool guid -> toplevel vdev guid -> label txg
*
* Duplicate entries matching this same tuple will be discarded. Once we have
* examined every device, we pick the best label txg config for each toplevel
* vdev. We then arrange these toplevel vdevs into a complete pool config, and
* update any paths that have changed. Finally, we attempt to import the pool
* using our derived config, and record the results.
*/
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <libintl.h>
#include <libgen.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/dktp/fdisk.h>
#include <sys/vdev_impl.h>
#include <sys/fs/zfs.h>
#include <sys/vdev_impl.h>
#include <thread_pool.h>
#include <libzutil.h>
#include <libnvpair.h>
#include "zutil_import.h"
#ifdef HAVE_LIBUDEV
#include <libudev.h>
#include <sched.h>
#endif
#include <blkid/blkid.h>
#define DEFAULT_IMPORT_PATH_SIZE 9
#define DEV_BYID_PATH "/dev/disk/by-id/"
static boolean_t
is_watchdog_dev(char *dev)
{
/* For 'watchdog' dev */
if (strcmp(dev, "watchdog") == 0)
return (B_TRUE);
/* For 'watchdog<digit><whatever> */
if (strstr(dev, "watchdog") == dev && isdigit(dev[8]))
return (B_TRUE);
return (B_FALSE);
}
void
zpool_open_func(void *arg)
{
rdsk_node_t *rn = arg;
libpc_handle_t *hdl = rn->rn_hdl;
struct stat64 statbuf;
nvlist_t *config;
char *bname, *dupname;
uint64_t vdev_guid = 0;
int error;
int num_labels = 0;
int fd;
/*
* Skip devices with well known prefixes there can be side effects
* when opening devices which need to be avoided.
*
* hpet - High Precision Event Timer
* watchdog - Watchdog must be closed in a special way.
*/
dupname = zutil_strdup(hdl, rn->rn_name);
bname = basename(dupname);
error = ((strcmp(bname, "hpet") == 0) || is_watchdog_dev(bname));
free(dupname);
if (error)
return;
/*
* Ignore failed stats. We only want regular files and block devices.
*/
if (stat64(rn->rn_name, &statbuf) != 0 ||
(!S_ISREG(statbuf.st_mode) && !S_ISBLK(statbuf.st_mode)))
return;
/*
* Preferentially open using O_DIRECT to bypass the block device
* cache which may be stale for multipath devices. An EINVAL errno
* indicates O_DIRECT is unsupported so fallback to just O_RDONLY.
*/
fd = open(rn->rn_name, O_RDONLY | O_DIRECT);
if ((fd < 0) && (errno == EINVAL))
fd = open(rn->rn_name, O_RDONLY);
if ((fd < 0) && (errno == EACCES))
hdl->lpc_open_access_error = B_TRUE;
if (fd < 0)
return;
/*
* This file is too small to hold a zpool
*/
if (S_ISREG(statbuf.st_mode) && statbuf.st_size < SPA_MINDEVSIZE) {
(void) close(fd);
return;
}
error = zpool_read_label(fd, &config, &num_labels);
if (error != 0) {
(void) close(fd);
return;
}
if (num_labels == 0) {
(void) close(fd);
nvlist_free(config);
return;
}
/*
* Check that the vdev is for the expected guid. Additional entries
* are speculatively added based on the paths stored in the labels.
* Entries with valid paths but incorrect guids must be removed.
*/
error = nvlist_lookup_uint64(config, ZPOOL_CONFIG_GUID, &vdev_guid);
if (error || (rn->rn_vdev_guid && rn->rn_vdev_guid != vdev_guid)) {
(void) close(fd);
nvlist_free(config);
return;
}
(void) close(fd);
rn->rn_config = config;
rn->rn_num_labels = num_labels;
/*
* Add additional entries for paths described by this label.
*/
if (rn->rn_labelpaths) {
char *path = NULL;
char *devid = NULL;
rdsk_node_t *slice;
avl_index_t where;
int error;
if (label_paths(rn->rn_hdl, rn->rn_config, &path, &devid))
return;
/*
* Allow devlinks to stabilize so all paths are available.
*/
zpool_label_disk_wait(rn->rn_name, DISK_LABEL_WAIT);
if (path != NULL) {
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
slice->rn_name = zutil_strdup(hdl, path);
slice->rn_vdev_guid = vdev_guid;
slice->rn_avl = rn->rn_avl;
slice->rn_hdl = hdl;
slice->rn_order = IMPORT_ORDER_PREFERRED_1;
slice->rn_labelpaths = B_FALSE;
pthread_mutex_lock(rn->rn_lock);
if (avl_find(rn->rn_avl, slice, &where)) {
pthread_mutex_unlock(rn->rn_lock);
free(slice->rn_name);
free(slice);
} else {
avl_insert(rn->rn_avl, slice, where);
pthread_mutex_unlock(rn->rn_lock);
zpool_open_func(slice);
}
}
if (devid != NULL) {
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
error = asprintf(&slice->rn_name, "%s%s",
DEV_BYID_PATH, devid);
if (error == -1) {
free(slice);
return;
}
slice->rn_vdev_guid = vdev_guid;
slice->rn_avl = rn->rn_avl;
slice->rn_hdl = hdl;
slice->rn_order = IMPORT_ORDER_PREFERRED_2;
slice->rn_labelpaths = B_FALSE;
pthread_mutex_lock(rn->rn_lock);
if (avl_find(rn->rn_avl, slice, &where)) {
pthread_mutex_unlock(rn->rn_lock);
free(slice->rn_name);
free(slice);
} else {
avl_insert(rn->rn_avl, slice, where);
pthread_mutex_unlock(rn->rn_lock);
zpool_open_func(slice);
}
}
}
}
static char *
zpool_default_import_path[DEFAULT_IMPORT_PATH_SIZE] = {
"/dev/disk/by-vdev", /* Custom rules, use first if they exist */
"/dev/mapper", /* Use multipath devices before components */
"/dev/disk/by-partlabel", /* Single unique entry set by user */
"/dev/disk/by-partuuid", /* Generated partition uuid */
"/dev/disk/by-label", /* Custom persistent labels */
"/dev/disk/by-uuid", /* Single unique entry and persistent */
"/dev/disk/by-id", /* May be multiple entries and persistent */
"/dev/disk/by-path", /* Encodes physical location and persistent */
"/dev" /* UNSAFE device names will change */
};
const char * const *
zpool_default_search_paths(size_t *count)
{
*count = DEFAULT_IMPORT_PATH_SIZE;
return ((const char * const *)zpool_default_import_path);
}
/*
* Given a full path to a device determine if that device appears in the
* import search path. If it does return the first match and store the
* index in the passed 'order' variable, otherwise return an error.
*/
static int
zfs_path_order(char *name, int *order)
{
int i = 0, error = ENOENT;
char *dir, *env, *envdup;
env = getenv("ZPOOL_IMPORT_PATH");
if (env) {
envdup = strdup(env);
dir = strtok(envdup, ":");
while (dir) {
if (strncmp(name, dir, strlen(dir)) == 0) {
*order = i;
error = 0;
break;
}
dir = strtok(NULL, ":");
i++;
}
free(envdup);
} else {
for (i = 0; i < DEFAULT_IMPORT_PATH_SIZE; i++) {
if (strncmp(name, zpool_default_import_path[i],
strlen(zpool_default_import_path[i])) == 0) {
*order = i;
error = 0;
break;
}
}
}
return (error);
}
/*
* Use libblkid to quickly enumerate all known zfs devices.
*/
int
zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
avl_tree_t **slice_cache)
{
rdsk_node_t *slice;
blkid_cache cache;
blkid_dev_iterate iter;
blkid_dev dev;
avl_index_t where;
int error;
*slice_cache = NULL;
error = blkid_get_cache(&cache, NULL);
if (error != 0)
return (error);
error = blkid_probe_all_new(cache);
if (error != 0) {
blkid_put_cache(cache);
return (error);
}
iter = blkid_dev_iterate_begin(cache);
if (iter == NULL) {
blkid_put_cache(cache);
return (EINVAL);
}
error = blkid_dev_set_search(iter, "TYPE", "zfs_member");
if (error != 0) {
blkid_dev_iterate_end(iter);
blkid_put_cache(cache);
return (error);
}
*slice_cache = zutil_alloc(hdl, sizeof (avl_tree_t));
avl_create(*slice_cache, slice_cache_compare, sizeof (rdsk_node_t),
offsetof(rdsk_node_t, rn_node));
while (blkid_dev_next(iter, &dev) == 0) {
slice = zutil_alloc(hdl, sizeof (rdsk_node_t));
slice->rn_name = zutil_strdup(hdl, blkid_dev_devname(dev));
slice->rn_vdev_guid = 0;
slice->rn_lock = lock;
slice->rn_avl = *slice_cache;
slice->rn_hdl = hdl;
slice->rn_labelpaths = B_TRUE;
error = zfs_path_order(slice->rn_name, &slice->rn_order);
if (error == 0)
slice->rn_order += IMPORT_ORDER_SCAN_OFFSET;
else
slice->rn_order = IMPORT_ORDER_DEFAULT;
pthread_mutex_lock(lock);
if (avl_find(*slice_cache, slice, &where)) {
free(slice->rn_name);
free(slice);
} else {
avl_insert(*slice_cache, slice, where);
}
pthread_mutex_unlock(lock);
}
blkid_dev_iterate_end(iter);
blkid_put_cache(cache);
return (0);
}
/*
* Linux persistent device strings for vdev labels
*
* based on libudev for consistency with libudev disk add/remove events
*/
typedef struct vdev_dev_strs {
char vds_devid[128];
char vds_devphys[128];
} vdev_dev_strs_t;
#ifdef HAVE_LIBUDEV
/*
* Obtain the persistent device id string (describes what)
*
* used by ZED vdev matching for auto-{online,expand,replace}
*/
int
zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
{
struct udev_list_entry *entry;
const char *bus;
char devbyid[MAXPATHLEN];
/* The bus based by-id path is preferred */
bus = udev_device_get_property_value(dev, "ID_BUS");
if (bus == NULL) {
const char *dm_uuid;
/*
* For multipath nodes use the persistent uuid based identifier
*
* Example: /dev/disk/by-id/dm-uuid-mpath-35000c5006304de3f
*/
dm_uuid = udev_device_get_property_value(dev, "DM_UUID");
if (dm_uuid != NULL) {
(void) snprintf(bufptr, buflen, "dm-uuid-%s", dm_uuid);
return (0);
}
/*
* For volumes use the persistent /dev/zvol/dataset identifier
*/
entry = udev_device_get_devlinks_list_entry(dev);
while (entry != NULL) {
const char *name;
name = udev_list_entry_get_name(entry);
if (strncmp(name, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
(void) strlcpy(bufptr, name, buflen);
return (0);
}
entry = udev_list_entry_get_next(entry);
}
/*
* NVME 'by-id' symlinks are similar to bus case
*/
struct udev_device *parent;
parent = udev_device_get_parent_with_subsystem_devtype(dev,
"nvme", NULL);
if (parent != NULL)
bus = "nvme"; /* continue with bus symlink search */
else
return (ENODATA);
}
/*
* locate the bus specific by-id link
*/
(void) snprintf(devbyid, sizeof (devbyid), "%s%s-", DEV_BYID_PATH, bus);
entry = udev_device_get_devlinks_list_entry(dev);
while (entry != NULL) {
const char *name;
name = udev_list_entry_get_name(entry);
if (strncmp(name, devbyid, strlen(devbyid)) == 0) {
name += strlen(DEV_BYID_PATH);
(void) strlcpy(bufptr, name, buflen);
return (0);
}
entry = udev_list_entry_get_next(entry);
}
return (ENODATA);
}
/*
* Obtain the persistent physical location string (describes where)
*
* used by ZED vdev matching for auto-{online,expand,replace}
*/
int
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
{
const char *physpath = NULL;
struct udev_list_entry *entry;
/*
* Normal disks use ID_PATH for their physical path.
*/
physpath = udev_device_get_property_value(dev, "ID_PATH");
if (physpath != NULL && strlen(physpath) > 0) {
(void) strlcpy(bufptr, physpath, buflen);
return (0);
}
/*
* Device mapper devices are virtual and don't have a physical
* path. For them we use ID_VDEV instead, which is setup via the
* /etc/vdev_id.conf file. ID_VDEV provides a persistent path
* to a virtual device. If you don't have vdev_id.conf setup,
* you cannot use multipath autoreplace with device mapper.
*/
physpath = udev_device_get_property_value(dev, "ID_VDEV");
if (physpath != NULL && strlen(physpath) > 0) {
(void) strlcpy(bufptr, physpath, buflen);
return (0);
}
/*
* For ZFS volumes use the persistent /dev/zvol/dataset identifier
*/
entry = udev_device_get_devlinks_list_entry(dev);
while (entry != NULL) {
physpath = udev_list_entry_get_name(entry);
if (strncmp(physpath, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0) {
(void) strlcpy(bufptr, physpath, buflen);
return (0);
}
entry = udev_list_entry_get_next(entry);
}
/*
* For all other devices fallback to using the by-uuid name.
*/
entry = udev_device_get_devlinks_list_entry(dev);
while (entry != NULL) {
physpath = udev_list_entry_get_name(entry);
if (strncmp(physpath, "/dev/disk/by-uuid", 17) == 0) {
(void) strlcpy(bufptr, physpath, buflen);
return (0);
}
entry = udev_list_entry_get_next(entry);
}
return (ENODATA);
}
/*
* A disk is considered a multipath whole disk when:
* DEVNAME key value has "dm-"
* DM_NAME key value has "mpath" prefix
* DM_UUID key exists
* ID_PART_TABLE_TYPE key does not exist or is not gpt
*/
static boolean_t
udev_mpath_whole_disk(struct udev_device *dev)
{
const char *devname, *type, *uuid;
devname = udev_device_get_property_value(dev, "DEVNAME");
type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
uuid = udev_device_get_property_value(dev, "DM_UUID");
if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
((type == NULL) || (strcmp(type, "gpt") != 0)) &&
(uuid != NULL)) {
return (B_TRUE);
}
return (B_FALSE);
}
static int
udev_device_is_ready(struct udev_device *dev)
{
#ifdef HAVE_LIBUDEV_UDEV_DEVICE_GET_IS_INITIALIZED
return (udev_device_get_is_initialized(dev));
#else
/* wait for DEVLINKS property to be initialized */
return (udev_device_get_property_value(dev, "DEVLINKS") != NULL);
#endif
}
#else
/* ARGSUSED */
int
zfs_device_get_devid(struct udev_device *dev, char *bufptr, size_t buflen)
{
return (ENODATA);
}
/* ARGSUSED */
int
zfs_device_get_physical(struct udev_device *dev, char *bufptr, size_t buflen)
{
return (ENODATA);
}
#endif /* HAVE_LIBUDEV */
/*
* Wait up to timeout_ms for udev to set up the device node. The device is
* considered ready when libudev determines it has been initialized, all of
* the device links have been verified to exist, and it has been allowed to
* settle. At this point the device the device can be accessed reliably.
* Depending on the complexity of the udev rules this process could take
* several seconds.
*/
int
zpool_label_disk_wait(const char *path, int timeout_ms)
{
#ifdef HAVE_LIBUDEV
struct udev *udev;
struct udev_device *dev = NULL;
char nodepath[MAXPATHLEN];
char *sysname = NULL;
int ret = ENODEV;
int settle_ms = 50;
long sleep_ms = 10;
hrtime_t start, settle;
if ((udev = udev_new()) == NULL)
return (ENXIO);
start = gethrtime();
settle = 0;
do {
if (sysname == NULL) {
if (realpath(path, nodepath) != NULL) {
sysname = strrchr(nodepath, '/') + 1;
} else {
(void) usleep(sleep_ms * MILLISEC);
continue;
}
}
dev = udev_device_new_from_subsystem_sysname(udev,
"block", sysname);
if ((dev != NULL) && udev_device_is_ready(dev)) {
struct udev_list_entry *links, *link = NULL;
ret = 0;
links = udev_device_get_devlinks_list_entry(dev);
udev_list_entry_foreach(link, links) {
struct stat64 statbuf;
const char *name;
name = udev_list_entry_get_name(link);
errno = 0;
if (stat64(name, &statbuf) == 0 && errno == 0)
continue;
settle = 0;
ret = ENODEV;
break;
}
if (ret == 0) {
if (settle == 0) {
settle = gethrtime();
} else if (NSEC2MSEC(gethrtime() - settle) >=
settle_ms) {
udev_device_unref(dev);
break;
}
}
}
udev_device_unref(dev);
(void) usleep(sleep_ms * MILLISEC);
} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
udev_unref(udev);
return (ret);
#else
int settle_ms = 50;
long sleep_ms = 10;
hrtime_t start, settle;
struct stat64 statbuf;
start = gethrtime();
settle = 0;
do {
errno = 0;
if ((stat64(path, &statbuf) == 0) && (errno == 0)) {
if (settle == 0)
settle = gethrtime();
else if (NSEC2MSEC(gethrtime() - settle) >= settle_ms)
return (0);
} else if (errno != ENOENT) {
return (errno);
}
usleep(sleep_ms * MILLISEC);
} while (NSEC2MSEC(gethrtime() - start) < timeout_ms);
return (ENODEV);
#endif /* HAVE_LIBUDEV */
}
/*
* Encode the persistent devices strings
* used for the vdev disk label
*/
static int
encode_device_strings(const char *path, vdev_dev_strs_t *ds,
boolean_t wholedisk)
{
#ifdef HAVE_LIBUDEV
struct udev *udev;
struct udev_device *dev = NULL;
char nodepath[MAXPATHLEN];
char *sysname;
int ret = ENODEV;
hrtime_t start;
if ((udev = udev_new()) == NULL)
return (ENXIO);
/* resolve path to a runtime device node instance */
if (realpath(path, nodepath) == NULL)
goto no_dev;
sysname = strrchr(nodepath, '/') + 1;
/*
* Wait up to 3 seconds for udev to set up the device node context
*/
start = gethrtime();
do {
dev = udev_device_new_from_subsystem_sysname(udev, "block",
sysname);
if (dev == NULL)
goto no_dev;
if (udev_device_is_ready(dev))
break; /* udev ready */
udev_device_unref(dev);
dev = NULL;
if (NSEC2MSEC(gethrtime() - start) < 10)
(void) sched_yield(); /* yield/busy wait up to 10ms */
else
(void) usleep(10 * MILLISEC);
} while (NSEC2MSEC(gethrtime() - start) < (3 * MILLISEC));
if (dev == NULL)
goto no_dev;
/*
* Only whole disks require extra device strings
*/
if (!wholedisk && !udev_mpath_whole_disk(dev))
goto no_dev;
ret = zfs_device_get_devid(dev, ds->vds_devid, sizeof (ds->vds_devid));
if (ret != 0)
goto no_dev_ref;
/* physical location string (optional) */
if (zfs_device_get_physical(dev, ds->vds_devphys,
sizeof (ds->vds_devphys)) != 0) {
ds->vds_devphys[0] = '\0'; /* empty string --> not available */
}
no_dev_ref:
udev_device_unref(dev);
no_dev:
udev_unref(udev);
return (ret);
#else
return (ENOENT);
#endif
}
/*
* Update a leaf vdev's persistent device strings
*
* - only applies for a dedicated leaf vdev (aka whole disk)
* - updated during pool create|add|attach|import
* - used for matching device matching during auto-{online,expand,replace}
* - stored in a leaf disk config label (i.e. alongside 'path' NVP)
* - these strings are currently not used in kernel (i.e. for vdev_disk_open)
*
* single device node example:
* devid: 'scsi-MG03SCA300_350000494a8cb3d67-part1'
* phys_path: 'pci-0000:04:00.0-sas-0x50000394a8cb3d67-lun-0'
*
* multipath device node example:
* devid: 'dm-uuid-mpath-35000c5006304de3f'
*
* We also store the enclosure sysfs path for turning on enclosure LEDs
* (if applicable):
* vdev_enc_sysfs_path: '/sys/class/enclosure/11:0:1:0/SLOT 4'
*/
void
update_vdev_config_dev_strs(nvlist_t *nv)
{
vdev_dev_strs_t vds;
char *env, *type, *path;
uint64_t wholedisk = 0;
char *upath, *spath;
/*
* For the benefit of legacy ZFS implementations, allow
* for opting out of devid strings in the vdev label.
*
* example use:
* env ZFS_VDEV_DEVID_OPT_OUT=YES zpool import dozer
*
* explanation:
* Older ZFS on Linux implementations had issues when attempting to
* display pool config VDEV names if a "devid" NVP value is present
* in the pool's config.
*
* For example, a pool that originated on illumos platform would
* have a devid value in the config and "zpool status" would fail
* when listing the config.
*
* A pool can be stripped of any "devid" values on import or
* prevented from adding them on zpool create|add by setting
* ZFS_VDEV_DEVID_OPT_OUT.
*/
env = getenv("ZFS_VDEV_DEVID_OPT_OUT");
if (env && (strtoul(env, NULL, 0) > 0 ||
!strncasecmp(env, "YES", 3) || !strncasecmp(env, "ON", 2))) {
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
return;
}
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) != 0 ||
strcmp(type, VDEV_TYPE_DISK) != 0) {
return;
}
if (nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path) != 0)
return;
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK, &wholedisk);
/*
* Update device string values in the config nvlist.
*/
if (encode_device_strings(path, &vds, (boolean_t)wholedisk) == 0) {
(void) nvlist_add_string(nv, ZPOOL_CONFIG_DEVID, vds.vds_devid);
if (vds.vds_devphys[0] != '\0') {
(void) nvlist_add_string(nv, ZPOOL_CONFIG_PHYS_PATH,
vds.vds_devphys);
}
/* Add enclosure sysfs path (if disk is in an enclosure). */
upath = zfs_get_underlying_path(path);
spath = zfs_get_enclosure_sysfs_path(upath);
if (spath)
nvlist_add_string(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH,
spath);
else
nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
free(upath);
free(spath);
} else {
/* Clear out any stale entries. */
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_DEVID);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_PHYS_PATH);
(void) nvlist_remove_all(nv, ZPOOL_CONFIG_VDEV_ENC_SYSFS_PATH);
}
}

View File

@ -23,54 +23,13 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <ctype.h>
#include <errno.h>
#include <dirent.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/efi_partition.h>
#include <libzutil.h>
#ifdef HAVE_LIBUDEV
#include <libudev.h>
#endif
/*
* Append partition suffix to an otherwise fully qualified device path.
* This is used to generate the name the full path as its stored in
* ZPOOL_CONFIG_PATH for whole disk devices. On success the new length
* of 'path' will be returned on error a negative value is returned.
*/
int
zfs_append_partition(char *path, size_t max_len)
{
int len = strlen(path);
if ((strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0) ||
(strncmp(path, ZVOL_ROOT, strlen(ZVOL_ROOT)) == 0)) {
if (len + 6 >= max_len)
return (-1);
(void) strcat(path, "-part1");
len += 6;
} else {
if (len + 2 >= max_len)
return (-1);
if (isdigit(path[len-1])) {
(void) strcat(path, "p1");
len += 2;
} else {
(void) strcat(path, "1");
len += 1;
}
}
return (len);
}
/*
* Given a shorthand device name check if a file by that name exists in any
@ -213,413 +172,3 @@ zfs_strcmp_pathname(const char *name, const char *cmp, int wholedisk)
return (0);
}
/*
* Allocate and return the underlying device name for a device mapper device.
* If a device mapper device maps to multiple devices, return the first device.
*
* For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a
* DM device (like /dev/disk/by-vdev/A0) are also allowed.
*
* Returns device name, or NULL on error or no match. If dm_name is not a DM
* device then return NULL.
*
* NOTE: The returned name string must be *freed*.
*/
static char *
dm_get_underlying_path(const char *dm_name)
{
DIR *dp = NULL;
struct dirent *ep;
char *realp;
char *tmp = NULL;
char *path = NULL;
char *dev_str;
int size;
if (dm_name == NULL)
return (NULL);
/* dm name may be a symlink (like /dev/disk/by-vdev/A0) */
realp = realpath(dm_name, NULL);
if (realp == NULL)
return (NULL);
/*
* If they preface 'dev' with a path (like "/dev") then strip it off.
* We just want the 'dm-N' part.
*/
tmp = strrchr(realp, '/');
if (tmp != NULL)
dev_str = tmp + 1; /* +1 since we want the chr after '/' */
else
dev_str = tmp;
size = asprintf(&tmp, "/sys/block/%s/slaves/", dev_str);
if (size == -1 || !tmp)
goto end;
dp = opendir(tmp);
if (dp == NULL)
goto end;
/* Return first sd* entry in /sys/block/dm-N/slaves/ */
while ((ep = readdir(dp))) {
if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */
size = asprintf(&path, "/dev/%s", ep->d_name);
break;
}
}
end:
if (dp != NULL)
closedir(dp);
free(tmp);
free(realp);
return (path);
}
/*
* Return 1 if device is a device mapper or multipath device.
* Return 0 if not.
*/
int
zfs_dev_is_dm(const char *dev_name)
{
char *tmp;
tmp = dm_get_underlying_path(dev_name);
if (tmp == NULL)
return (0);
free(tmp);
return (1);
}
/*
* By "whole disk" we mean an entire physical disk (something we can
* label, toggle the write cache on, etc.) as opposed to the full
* capacity of a pseudo-device such as lofi or did. We act as if we
* are labeling the disk, which should be a pretty good test of whether
* it's a viable device or not. Returns B_TRUE if it is and B_FALSE if
* it isn't.
*/
int
zfs_dev_is_whole_disk(const char *dev_name)
{
struct dk_gpt *label;
int fd;
if ((fd = open(dev_name, O_RDONLY | O_DIRECT)) < 0)
return (0);
if (efi_alloc_and_init(fd, EFI_NUMPAR, &label) != 0) {
(void) close(fd);
return (0);
}
efi_free(label);
(void) close(fd);
return (1);
}
/*
* Lookup the underlying device for a device name
*
* Often you'll have a symlink to a device, a partition device,
* or a multipath device, and want to look up the underlying device.
* This function returns the underlying device name. If the device
* name is already the underlying device, then just return the same
* name. If the device is a DM device with multiple underlying devices
* then return the first one.
*
* For example:
*
* 1. /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001 -> ../../sda
* dev_name: /dev/disk/by-id/ata-QEMU_HARDDISK_QM00001
* returns: /dev/sda
*
* 2. /dev/mapper/mpatha (made up of /dev/sda and /dev/sdb)
* dev_name: /dev/mapper/mpatha
* returns: /dev/sda (first device)
*
* 3. /dev/sda (already the underlying device)
* dev_name: /dev/sda
* returns: /dev/sda
*
* 4. /dev/dm-3 (mapped to /dev/sda)
* dev_name: /dev/dm-3
* returns: /dev/sda
*
* 5. /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9 -> ../../sdb9
* dev_name: /dev/disk/by-id/scsi-0QEMU_drive-scsi0-0-0-0-part9
* returns: /dev/sdb
*
* 6. /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a -> ../dev/sda2
* dev_name: /dev/disk/by-uuid/5df030cf-3cd9-46e4-8e99-3ccb462a4e9a
* returns: /dev/sda
*
* Returns underlying device name, or NULL on error or no match.
*
* NOTE: The returned name string must be *freed*.
*/
char *
zfs_get_underlying_path(const char *dev_name)
{
char *name = NULL;
char *tmp;
if (dev_name == NULL)
return (NULL);
tmp = dm_get_underlying_path(dev_name);
/* dev_name not a DM device, so just un-symlinkize it */
if (tmp == NULL)
tmp = realpath(dev_name, NULL);
if (tmp != NULL) {
name = zfs_strip_partition_path(tmp);
free(tmp);
}
return (name);
}
/*
* Given a dev name like "sda", return the full enclosure sysfs path to
* the disk. You can also pass in the name with "/dev" prepended
* to it (like /dev/sda).
*
* For example, disk "sda" in enclosure slot 1:
* dev: "sda"
* returns: "/sys/class/enclosure/1:0:3:0/Slot 1"
*
* 'dev' must be a non-devicemapper device.
*
* Returned string must be freed.
*/
char *
zfs_get_enclosure_sysfs_path(const char *dev_name)
{
DIR *dp = NULL;
struct dirent *ep;
char buf[MAXPATHLEN];
char *tmp1 = NULL;
char *tmp2 = NULL;
char *tmp3 = NULL;
char *path = NULL;
size_t size;
int tmpsize;
if (dev_name == NULL)
return (NULL);
/* If they preface 'dev' with a path (like "/dev") then strip it off */
tmp1 = strrchr(dev_name, '/');
if (tmp1 != NULL)
dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */
tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name);
if (tmpsize == -1 || tmp1 == NULL) {
tmp1 = NULL;
goto end;
}
dp = opendir(tmp1);
if (dp == NULL) {
tmp1 = NULL; /* To make free() at the end a NOP */
goto end;
}
/*
* Look though all sysfs entries in /sys/block/<dev>/device for
* the enclosure symlink.
*/
while ((ep = readdir(dp))) {
/* Ignore everything that's not our enclosure_device link */
if (strstr(ep->d_name, "enclosure_device") == NULL)
continue;
if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 ||
tmp2 == NULL)
break;
size = readlink(tmp2, buf, sizeof (buf));
/* Did readlink fail or crop the link name? */
if (size == -1 || size >= sizeof (buf)) {
free(tmp2);
tmp2 = NULL; /* To make free() at the end a NOP */
break;
}
/*
* We got a valid link. readlink() doesn't terminate strings
* so we have to do it.
*/
buf[size] = '\0';
/*
* Our link will look like:
*
* "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1"
*
* We want to grab the "enclosure/1:0:3:0/SLOT 1" part
*/
tmp3 = strstr(buf, "enclosure");
if (tmp3 == NULL)
break;
if (asprintf(&path, "/sys/class/%s", tmp3) == -1) {
/* If asprintf() fails, 'path' is undefined */
path = NULL;
break;
}
if (path == NULL)
break;
}
end:
free(tmp2);
free(tmp1);
if (dp != NULL)
closedir(dp);
return (path);
}
/*
* Remove partition suffix from a vdev path. Partition suffixes may take three
* forms: "-partX", "pX", or "X", where X is a string of digits. The second
* case only occurs when the suffix is preceded by a digit, i.e. "md0p0" The
* third case only occurs when preceded by a string matching the regular
* expression "^([hsv]|xv)d[a-z]+", i.e. a scsi, ide, virtio or xen disk.
*
* caller must free the returned string
*/
char *
zfs_strip_partition(char *path)
{
char *tmp = strdup(path);
char *part = NULL, *d = NULL;
if (!tmp)
return (NULL);
if ((part = strstr(tmp, "-part")) && part != tmp) {
d = part + 5;
} else if ((part = strrchr(tmp, 'p')) &&
part > tmp + 1 && isdigit(*(part-1))) {
d = part + 1;
} else if ((tmp[0] == 'h' || tmp[0] == 's' || tmp[0] == 'v') &&
tmp[1] == 'd') {
for (d = &tmp[2]; isalpha(*d); part = ++d) { }
} else if (strncmp("xvd", tmp, 3) == 0) {
for (d = &tmp[3]; isalpha(*d); part = ++d) { }
}
if (part && d && *d != '\0') {
for (; isdigit(*d); d++) { }
if (*d == '\0')
*part = '\0';
}
return (tmp);
}
/*
* Same as zfs_strip_partition, but allows "/dev/" to be in the pathname
*
* path: /dev/sda1
* returns: /dev/sda
*
* Returned string must be freed.
*/
char *
zfs_strip_partition_path(char *path)
{
char *newpath = strdup(path);
char *sd_offset;
char *new_sd;
if (!newpath)
return (NULL);
/* Point to "sda1" part of "/dev/sda1" */
sd_offset = strrchr(newpath, '/') + 1;
/* Get our new name "sda" */
new_sd = zfs_strip_partition(sd_offset);
if (!new_sd) {
free(newpath);
return (NULL);
}
/* Paste the "sda" where "sda1" was */
strlcpy(sd_offset, new_sd, strlen(sd_offset) + 1);
/* Free temporary "sda" */
free(new_sd);
return (newpath);
}
#ifdef HAVE_LIBUDEV
/*
* A disk is considered a multipath whole disk when:
* DEVNAME key value has "dm-"
* DM_NAME key value has "mpath" prefix
* DM_UUID key exists
* ID_PART_TABLE_TYPE key does not exist or is not gpt
*/
static boolean_t
udev_mpath_whole_disk(struct udev_device *dev)
{
const char *devname, *type, *uuid;
devname = udev_device_get_property_value(dev, "DEVNAME");
type = udev_device_get_property_value(dev, "ID_PART_TABLE_TYPE");
uuid = udev_device_get_property_value(dev, "DM_UUID");
if ((devname != NULL && strncmp(devname, "/dev/dm-", 8) == 0) &&
((type == NULL) || (strcmp(type, "gpt") != 0)) &&
(uuid != NULL)) {
return (B_TRUE);
}
return (B_FALSE);
}
/*
* Check if a disk is effectively a multipath whole disk
*/
boolean_t
is_mpath_whole_disk(const char *path)
{
struct udev *udev;
struct udev_device *dev = NULL;
char nodepath[MAXPATHLEN];
char *sysname;
boolean_t wholedisk = B_FALSE;
if (realpath(path, nodepath) == NULL)
return (B_FALSE);
sysname = strrchr(nodepath, '/') + 1;
if (strncmp(sysname, "dm-", 3) != 0)
return (B_FALSE);
if ((udev = udev_new()) == NULL)
return (B_FALSE);
if ((dev = udev_device_new_from_subsystem_sysname(udev, "block",
sysname)) == NULL) {
udev_device_unref(dev);
return (B_FALSE);
}
wholedisk = udev_mpath_whole_disk(dev);
udev_device_unref(dev);
return (wholedisk);
}
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,76 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2015 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
* Copyright 2015 RackTop Systems.
* Copyright (c) 2016, Intel Corporation.
*/
#ifndef _LIBZUTIL_ZUTIL_IMPORT_H_
#define _LIBZUTIL_ZUTIL_IMPORT_H_
#define EZFS_BADCACHE "invalid or missing cache file"
#define EZFS_BADPATH "must be an absolute path"
#define EZFS_NOMEM "out of memory"
#define EZFS_EACESS "some devices require root privileges"
#define IMPORT_ORDER_PREFERRED_1 1
#define IMPORT_ORDER_PREFERRED_2 2
#define IMPORT_ORDER_SCAN_OFFSET 10
#define IMPORT_ORDER_DEFAULT 100
typedef struct libpc_handle {
boolean_t lpc_printerr;
boolean_t lpc_open_access_error;
boolean_t lpc_desc_active;
char lpc_desc[1024];
const pool_config_ops_t *lpc_ops;
void *lpc_lib_handle;
} libpc_handle_t;
int label_paths(libpc_handle_t *hdl, nvlist_t *label, char **path,
char **devid);
int zpool_find_import_blkid(libpc_handle_t *hdl, pthread_mutex_t *lock,
avl_tree_t **slice_cache);
void * zutil_alloc(libpc_handle_t *hdl, size_t size);
char *zutil_strdup(libpc_handle_t *hdl, const char *str);
typedef struct rdsk_node {
char *rn_name; /* Full path to device */
int rn_order; /* Preferred order (low to high) */
int rn_num_labels; /* Number of valid labels */
uint64_t rn_vdev_guid; /* Expected vdev guid when set */
libpc_handle_t *rn_hdl;
nvlist_t *rn_config; /* Label config */
avl_tree_t *rn_avl;
avl_node_t rn_node;
pthread_mutex_t *rn_lock;
boolean_t rn_labelpaths;
} rdsk_node_t;
int slice_cache_compare(const void *, const void *);
void zpool_open_func(void *);
#endif /* _LIBZUTIL_ZUTIL_IMPORT_H_ */