OpenZFS restructuring - zpool
Factor Linux specific functions out of the zpool command. Reviewed-by: Allan Jude <allanjude@freebsd.org> Reviewed-by: Ryan Moeller <ryan@ixsystems.com> Reviewed-by: Sean Eric Fagan <sef@ixsystems.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Reviewed-by: loli10K <ezomori.nozomu@gmail.com> Signed-off-by: Matt Macy <mmacy@FreeBSD.org> Closes #9333
This commit is contained in:
parent
7bb0c29468
commit
3283f137d7
@ -2,7 +2,8 @@ include $(top_srcdir)/config/Rules.am
|
||||
|
||||
DEFAULT_INCLUDES += \
|
||||
-I$(top_srcdir)/include \
|
||||
-I$(top_srcdir)/lib/libspl/include
|
||||
-I$(top_srcdir)/lib/libspl/include \
|
||||
-I.
|
||||
|
||||
sbin_PROGRAMS = zpool
|
||||
|
||||
@ -13,6 +14,10 @@ zpool_SOURCES = \
|
||||
zpool_util.h \
|
||||
zpool_vdev.c
|
||||
|
||||
if BUILD_LINUX
|
||||
zpool_SOURCES += os/linux/zpool_vdev_os.c
|
||||
endif
|
||||
|
||||
zpool_LDADD = \
|
||||
$(top_builddir)/lib/libnvpair/libnvpair.la \
|
||||
$(top_builddir)/lib/libuutil/libuutil.la \
|
||||
|
411
cmd/zpool/os/linux/zpool_vdev_os.c
Normal file
411
cmd/zpool/os/linux/zpool_vdev_os.c
Normal file
@ -0,0 +1,411 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2016, 2017 Intel Corporation.
|
||||
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Functions to convert between a list of vdevs and an nvlist representing the
|
||||
* configuration. Each entry in the list can be one of:
|
||||
*
|
||||
* Device vdevs
|
||||
* disk=(path=..., devid=...)
|
||||
* file=(path=...)
|
||||
*
|
||||
* Group vdevs
|
||||
* raidz[1|2]=(...)
|
||||
* mirror=(...)
|
||||
*
|
||||
* Hot spares
|
||||
*
|
||||
* While the underlying implementation supports it, group vdevs cannot contain
|
||||
* other group vdevs. All userland verification of devices is contained within
|
||||
* this file. If successful, the nvlist returned can be passed directly to the
|
||||
* kernel; we've done as much verification as possible in userland.
|
||||
*
|
||||
* Hot spares are a special case, and passed down as an array of disk vdevs, at
|
||||
* the same level as the root of the vdev tree.
|
||||
*
|
||||
* The only function exported by this file is 'make_root_vdev'. The
|
||||
* function performs several passes:
|
||||
*
|
||||
* 1. Construct the vdev specification. Performs syntax validation and
|
||||
* makes sure each device is valid.
|
||||
* 2. Check for devices in use. Using libblkid to make sure that no
|
||||
* devices are also in use. Some can be overridden using the 'force'
|
||||
* flag, others cannot.
|
||||
* 3. Check for replication errors if the 'force' flag is not specified.
|
||||
* validates that the replication level is consistent across the
|
||||
* entire pool.
|
||||
* 4. Call libzfs to label any whole disks with an EFI label.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
#include <devid.h>
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
#include <libintl.h>
|
||||
#include <libnvpair.h>
|
||||
#include <libzutil.h>
|
||||
#include <limits.h>
|
||||
#include <sys/spa.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include "zpool_util.h"
|
||||
#include <sys/zfs_context.h>
|
||||
|
||||
#include <scsi/scsi.h>
|
||||
#include <scsi/sg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/efi_partition.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/vtoc.h>
|
||||
#include <sys/mntent.h>
|
||||
#include <uuid/uuid.h>
|
||||
#include <blkid/blkid.h>
|
||||
|
||||
typedef struct vdev_disk_db_entry
|
||||
{
|
||||
char id[24];
|
||||
int sector_size;
|
||||
} vdev_disk_db_entry_t;
|
||||
|
||||
/*
|
||||
* Database of block devices that lie about physical sector sizes. The
|
||||
* identification string must be precisely 24 characters to avoid false
|
||||
* negatives
|
||||
*/
|
||||
static vdev_disk_db_entry_t vdev_disk_database[] = {
|
||||
{"ATA ADATA SSD S396 3", 8192},
|
||||
{"ATA APPLE SSD SM128E", 8192},
|
||||
{"ATA APPLE SSD SM256E", 8192},
|
||||
{"ATA APPLE SSD SM512E", 8192},
|
||||
{"ATA APPLE SSD SM768E", 8192},
|
||||
{"ATA C400-MTFDDAC064M", 8192},
|
||||
{"ATA C400-MTFDDAC128M", 8192},
|
||||
{"ATA C400-MTFDDAC256M", 8192},
|
||||
{"ATA C400-MTFDDAC512M", 8192},
|
||||
{"ATA Corsair Force 3 ", 8192},
|
||||
{"ATA Corsair Force GS", 8192},
|
||||
{"ATA INTEL SSDSA2CT04", 8192},
|
||||
{"ATA INTEL SSDSA2BZ10", 8192},
|
||||
{"ATA INTEL SSDSA2BZ20", 8192},
|
||||
{"ATA INTEL SSDSA2BZ30", 8192},
|
||||
{"ATA INTEL SSDSA2CW04", 8192},
|
||||
{"ATA INTEL SSDSA2CW08", 8192},
|
||||
{"ATA INTEL SSDSA2CW12", 8192},
|
||||
{"ATA INTEL SSDSA2CW16", 8192},
|
||||
{"ATA INTEL SSDSA2CW30", 8192},
|
||||
{"ATA INTEL SSDSA2CW60", 8192},
|
||||
{"ATA INTEL SSDSC2CT06", 8192},
|
||||
{"ATA INTEL SSDSC2CT12", 8192},
|
||||
{"ATA INTEL SSDSC2CT18", 8192},
|
||||
{"ATA INTEL SSDSC2CT24", 8192},
|
||||
{"ATA INTEL SSDSC2CW06", 8192},
|
||||
{"ATA INTEL SSDSC2CW12", 8192},
|
||||
{"ATA INTEL SSDSC2CW18", 8192},
|
||||
{"ATA INTEL SSDSC2CW24", 8192},
|
||||
{"ATA INTEL SSDSC2CW48", 8192},
|
||||
{"ATA KINGSTON SH100S3", 8192},
|
||||
{"ATA KINGSTON SH103S3", 8192},
|
||||
{"ATA M4-CT064M4SSD2 ", 8192},
|
||||
{"ATA M4-CT128M4SSD2 ", 8192},
|
||||
{"ATA M4-CT256M4SSD2 ", 8192},
|
||||
{"ATA M4-CT512M4SSD2 ", 8192},
|
||||
{"ATA OCZ-AGILITY2 ", 8192},
|
||||
{"ATA OCZ-AGILITY3 ", 8192},
|
||||
{"ATA OCZ-VERTEX2 3.5 ", 8192},
|
||||
{"ATA OCZ-VERTEX3 ", 8192},
|
||||
{"ATA OCZ-VERTEX3 LT ", 8192},
|
||||
{"ATA OCZ-VERTEX3 MI ", 8192},
|
||||
{"ATA OCZ-VERTEX4 ", 8192},
|
||||
{"ATA SAMSUNG MZ7WD120", 8192},
|
||||
{"ATA SAMSUNG MZ7WD240", 8192},
|
||||
{"ATA SAMSUNG MZ7WD480", 8192},
|
||||
{"ATA SAMSUNG MZ7WD960", 8192},
|
||||
{"ATA SAMSUNG SSD 830 ", 8192},
|
||||
{"ATA Samsung SSD 840 ", 8192},
|
||||
{"ATA SanDisk SSD U100", 8192},
|
||||
{"ATA TOSHIBA THNSNH06", 8192},
|
||||
{"ATA TOSHIBA THNSNH12", 8192},
|
||||
{"ATA TOSHIBA THNSNH25", 8192},
|
||||
{"ATA TOSHIBA THNSNH51", 8192},
|
||||
{"ATA APPLE SSD TS064C", 4096},
|
||||
{"ATA APPLE SSD TS128C", 4096},
|
||||
{"ATA APPLE SSD TS256C", 4096},
|
||||
{"ATA APPLE SSD TS512C", 4096},
|
||||
{"ATA INTEL SSDSA2M040", 4096},
|
||||
{"ATA INTEL SSDSA2M080", 4096},
|
||||
{"ATA INTEL SSDSA2M160", 4096},
|
||||
{"ATA INTEL SSDSC2MH12", 4096},
|
||||
{"ATA INTEL SSDSC2MH25", 4096},
|
||||
{"ATA OCZ CORE_SSD ", 4096},
|
||||
{"ATA OCZ-VERTEX ", 4096},
|
||||
{"ATA SAMSUNG MCCOE32G", 4096},
|
||||
{"ATA SAMSUNG MCCOE64G", 4096},
|
||||
{"ATA SAMSUNG SSD PM80", 4096},
|
||||
/* Flash drives optimized for 4KB IOs on larger pages */
|
||||
{"ATA INTEL SSDSC2BA10", 4096},
|
||||
{"ATA INTEL SSDSC2BA20", 4096},
|
||||
{"ATA INTEL SSDSC2BA40", 4096},
|
||||
{"ATA INTEL SSDSC2BA80", 4096},
|
||||
{"ATA INTEL SSDSC2BB08", 4096},
|
||||
{"ATA INTEL SSDSC2BB12", 4096},
|
||||
{"ATA INTEL SSDSC2BB16", 4096},
|
||||
{"ATA INTEL SSDSC2BB24", 4096},
|
||||
{"ATA INTEL SSDSC2BB30", 4096},
|
||||
{"ATA INTEL SSDSC2BB40", 4096},
|
||||
{"ATA INTEL SSDSC2BB48", 4096},
|
||||
{"ATA INTEL SSDSC2BB60", 4096},
|
||||
{"ATA INTEL SSDSC2BB80", 4096},
|
||||
{"ATA INTEL SSDSC2BW24", 4096},
|
||||
{"ATA INTEL SSDSC2BW48", 4096},
|
||||
{"ATA INTEL SSDSC2BP24", 4096},
|
||||
{"ATA INTEL SSDSC2BP48", 4096},
|
||||
{"NA SmrtStorSDLKAE9W", 4096},
|
||||
{"NVMe Amazon EC2 NVMe ", 4096},
|
||||
/* Imported from Open Solaris */
|
||||
{"ATA MARVELL SD88SA02", 4096},
|
||||
/* Advanced format Hard drives */
|
||||
{"ATA Hitachi HDS5C303", 4096},
|
||||
{"ATA SAMSUNG HD204UI ", 4096},
|
||||
{"ATA ST2000DL004 HD20", 4096},
|
||||
{"ATA WDC WD10EARS-00M", 4096},
|
||||
{"ATA WDC WD10EARS-00S", 4096},
|
||||
{"ATA WDC WD10EARS-00Z", 4096},
|
||||
{"ATA WDC WD15EARS-00M", 4096},
|
||||
{"ATA WDC WD15EARS-00S", 4096},
|
||||
{"ATA WDC WD15EARS-00Z", 4096},
|
||||
{"ATA WDC WD20EARS-00M", 4096},
|
||||
{"ATA WDC WD20EARS-00S", 4096},
|
||||
{"ATA WDC WD20EARS-00Z", 4096},
|
||||
{"ATA WDC WD1600BEVT-0", 4096},
|
||||
{"ATA WDC WD2500BEVT-0", 4096},
|
||||
{"ATA WDC WD3200BEVT-0", 4096},
|
||||
{"ATA WDC WD5000BEVT-0", 4096},
|
||||
};
|
||||
|
||||
|
||||
#define INQ_REPLY_LEN 96
|
||||
#define INQ_CMD_LEN 6
|
||||
|
||||
static const int vdev_disk_database_size =
|
||||
sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
|
||||
|
||||
boolean_t
|
||||
check_sector_size_database(char *path, int *sector_size)
|
||||
{
|
||||
unsigned char inq_buff[INQ_REPLY_LEN];
|
||||
unsigned char sense_buffer[32];
|
||||
unsigned char inq_cmd_blk[INQ_CMD_LEN] =
|
||||
{INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
|
||||
sg_io_hdr_t io_hdr;
|
||||
int error;
|
||||
int fd;
|
||||
int i;
|
||||
|
||||
/* Prepare INQUIRY command */
|
||||
memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
|
||||
io_hdr.interface_id = 'S';
|
||||
io_hdr.cmd_len = sizeof (inq_cmd_blk);
|
||||
io_hdr.mx_sb_len = sizeof (sense_buffer);
|
||||
io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
|
||||
io_hdr.dxfer_len = INQ_REPLY_LEN;
|
||||
io_hdr.dxferp = inq_buff;
|
||||
io_hdr.cmdp = inq_cmd_blk;
|
||||
io_hdr.sbp = sense_buffer;
|
||||
io_hdr.timeout = 10; /* 10 milliseconds is ample time */
|
||||
|
||||
if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
|
||||
return (B_FALSE);
|
||||
|
||||
error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
|
||||
|
||||
(void) close(fd);
|
||||
|
||||
if (error < 0)
|
||||
return (B_FALSE);
|
||||
|
||||
if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
|
||||
return (B_FALSE);
|
||||
|
||||
for (i = 0; i < vdev_disk_database_size; i++) {
|
||||
if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
|
||||
continue;
|
||||
|
||||
*sector_size = vdev_disk_database[i].sector_size;
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
static int
|
||||
check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
|
||||
{
|
||||
int err;
|
||||
char *value;
|
||||
|
||||
/* No valid type detected device is safe to use */
|
||||
value = blkid_get_tag_value(cache, "TYPE", path);
|
||||
if (value == NULL)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If libblkid detects a ZFS device, we check the device
|
||||
* using check_file() to see if it's safe. The one safe
|
||||
* case is a spare device shared between multiple pools.
|
||||
*/
|
||||
if (strcmp(value, "zfs_member") == 0) {
|
||||
err = check_file(path, force, isspare);
|
||||
} else {
|
||||
if (force) {
|
||||
err = 0;
|
||||
} else {
|
||||
err = -1;
|
||||
vdev_error(gettext("%s contains a filesystem of "
|
||||
"type '%s'\n"), path, value);
|
||||
}
|
||||
}
|
||||
|
||||
free(value);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate that a disk including all partitions are safe to use.
|
||||
*
|
||||
* For EFI labeled disks this can done relatively easily with the libefi
|
||||
* library. The partition numbers are extracted from the label and used
|
||||
* to generate the expected /dev/ paths. Each partition can then be
|
||||
* checked for conflicts.
|
||||
*
|
||||
* For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
|
||||
* but due to the lack of a readily available libraries this scanning is
|
||||
* not implemented. Instead only the device path as given is checked.
|
||||
*/
|
||||
static int
|
||||
check_disk(const char *path, blkid_cache cache, int force,
|
||||
boolean_t isspare, boolean_t iswholedisk)
|
||||
{
|
||||
struct dk_gpt *vtoc;
|
||||
char slice_path[MAXPATHLEN];
|
||||
int err = 0;
|
||||
int fd, i;
|
||||
int flags = O_RDONLY|O_DIRECT;
|
||||
|
||||
if (!iswholedisk)
|
||||
return (check_slice(path, cache, force, isspare));
|
||||
|
||||
/* only spares can be shared, other devices require exclusive access */
|
||||
if (!isspare)
|
||||
flags |= O_EXCL;
|
||||
|
||||
if ((fd = open(path, flags)) < 0) {
|
||||
char *value = blkid_get_tag_value(cache, "TYPE", path);
|
||||
(void) fprintf(stderr, gettext("%s is in use and contains "
|
||||
"a %s filesystem.\n"), path, value ? value : "unknown");
|
||||
free(value);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Expected to fail for non-EFI labeled disks. Just check the device
|
||||
* as given and do not attempt to detect and scan partitions.
|
||||
*/
|
||||
err = efi_alloc_and_read(fd, &vtoc);
|
||||
if (err) {
|
||||
(void) close(fd);
|
||||
return (check_slice(path, cache, force, isspare));
|
||||
}
|
||||
|
||||
/*
|
||||
* The primary efi partition label is damaged however the secondary
|
||||
* label at the end of the device is intact. Rather than use this
|
||||
* label we should play it safe and treat this as a non efi device.
|
||||
*/
|
||||
if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
|
||||
efi_free(vtoc);
|
||||
(void) close(fd);
|
||||
|
||||
if (force) {
|
||||
/* Partitions will now be created using the backup */
|
||||
return (0);
|
||||
} else {
|
||||
vdev_error(gettext("%s contains a corrupt primary "
|
||||
"EFI label.\n"), path);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < vtoc->efi_nparts; i++) {
|
||||
|
||||
if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
|
||||
uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
|
||||
continue;
|
||||
|
||||
if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
|
||||
(void) snprintf(slice_path, sizeof (slice_path),
|
||||
"%s%s%d", path, "-part", i+1);
|
||||
else
|
||||
(void) snprintf(slice_path, sizeof (slice_path),
|
||||
"%s%s%d", path, isdigit(path[strlen(path)-1]) ?
|
||||
"p" : "", i+1);
|
||||
|
||||
err = check_slice(slice_path, cache, force, isspare);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
efi_free(vtoc);
|
||||
(void) close(fd);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
check_device(const char *path, boolean_t force,
|
||||
boolean_t isspare, boolean_t iswholedisk)
|
||||
{
|
||||
blkid_cache cache;
|
||||
int error;
|
||||
|
||||
error = blkid_get_cache(&cache, NULL);
|
||||
if (error != 0) {
|
||||
(void) fprintf(stderr, gettext("unable to access the blkid "
|
||||
"cache.\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
error = check_disk(path, cache, force, isspare, iswholedisk);
|
||||
blkid_put_cache(cache);
|
||||
|
||||
return (error);
|
||||
}
|
@ -4643,7 +4643,7 @@ get_interval_count(int *argcp, char **argv, float *iv,
|
||||
/*
|
||||
* Determine if the last argument is an integer or a pool name
|
||||
*/
|
||||
if (argc > 0 && isnumber(argv[argc - 1])) {
|
||||
if (argc > 0 && zfs_isnumber(argv[argc - 1])) {
|
||||
char *end;
|
||||
|
||||
errno = 0;
|
||||
@ -4673,7 +4673,7 @@ get_interval_count(int *argcp, char **argv, float *iv,
|
||||
* If the last argument is also an integer, then we have both a count
|
||||
* and an interval.
|
||||
*/
|
||||
if (argc > 0 && isnumber(argv[argc - 1])) {
|
||||
if (argc > 0 && zfs_isnumber(argv[argc - 1])) {
|
||||
char *end;
|
||||
|
||||
errno = 0;
|
||||
|
@ -98,20 +98,6 @@ array64_max(uint64_t array[], unsigned int len)
|
||||
return (max);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return 1 if "str" is a number string, 0 otherwise. Works for integer and
|
||||
* floating point numbers.
|
||||
*/
|
||||
int
|
||||
isnumber(char *str)
|
||||
{
|
||||
for (; *str; str++)
|
||||
if (!(isdigit(*str) || (*str == '.')))
|
||||
return (0);
|
||||
|
||||
return (1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find highest one bit set.
|
||||
* Returns bit number + 1 of highest bit that is set, otherwise returns 0.
|
||||
|
@ -42,7 +42,6 @@ void *safe_malloc(size_t);
|
||||
void zpool_no_memory(void);
|
||||
uint_t num_logs(nvlist_t *nv);
|
||||
uint64_t array64_max(uint64_t array[], unsigned int len);
|
||||
int isnumber(char *str);
|
||||
int highbit64(uint64_t i);
|
||||
int lowbit64(uint64_t i);
|
||||
|
||||
@ -125,6 +124,12 @@ vdev_cmd_data_list_t *all_pools_for_each_vdev_run(int argc, char **argv,
|
||||
|
||||
void free_vdev_cmd_data_list(vdev_cmd_data_list_t *vcdl);
|
||||
|
||||
int check_device(const char *path, boolean_t force,
|
||||
boolean_t isspare, boolean_t iswholedisk);
|
||||
boolean_t check_sector_size_database(char *path, int *sector_size);
|
||||
void vdev_error(const char *fmt, ...);
|
||||
int check_file(const char *file, boolean_t force, boolean_t isspare);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -72,19 +72,12 @@
|
||||
#include <libzutil.h>
|
||||
#include <limits.h>
|
||||
#include <sys/spa.h>
|
||||
#include <scsi/scsi.h>
|
||||
#include <scsi/sg.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <sys/efi_partition.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/vtoc.h>
|
||||
#include <sys/mntent.h>
|
||||
#include <uuid/uuid.h>
|
||||
#include <blkid/blkid.h>
|
||||
#include "zpool_util.h"
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
/*
|
||||
* For any given vdev specification, we can have multiple errors. The
|
||||
@ -94,191 +87,11 @@
|
||||
boolean_t error_seen;
|
||||
boolean_t is_force;
|
||||
|
||||
typedef struct vdev_disk_db_entry
|
||||
{
|
||||
char id[24];
|
||||
int sector_size;
|
||||
} vdev_disk_db_entry_t;
|
||||
|
||||
/*
|
||||
* Database of block devices that lie about physical sector sizes. The
|
||||
* identification string must be precisely 24 characters to avoid false
|
||||
* negatives
|
||||
*/
|
||||
static vdev_disk_db_entry_t vdev_disk_database[] = {
|
||||
{"ATA ADATA SSD S396 3", 8192},
|
||||
{"ATA APPLE SSD SM128E", 8192},
|
||||
{"ATA APPLE SSD SM256E", 8192},
|
||||
{"ATA APPLE SSD SM512E", 8192},
|
||||
{"ATA APPLE SSD SM768E", 8192},
|
||||
{"ATA C400-MTFDDAC064M", 8192},
|
||||
{"ATA C400-MTFDDAC128M", 8192},
|
||||
{"ATA C400-MTFDDAC256M", 8192},
|
||||
{"ATA C400-MTFDDAC512M", 8192},
|
||||
{"ATA Corsair Force 3 ", 8192},
|
||||
{"ATA Corsair Force GS", 8192},
|
||||
{"ATA INTEL SSDSA2CT04", 8192},
|
||||
{"ATA INTEL SSDSA2BZ10", 8192},
|
||||
{"ATA INTEL SSDSA2BZ20", 8192},
|
||||
{"ATA INTEL SSDSA2BZ30", 8192},
|
||||
{"ATA INTEL SSDSA2CW04", 8192},
|
||||
{"ATA INTEL SSDSA2CW08", 8192},
|
||||
{"ATA INTEL SSDSA2CW12", 8192},
|
||||
{"ATA INTEL SSDSA2CW16", 8192},
|
||||
{"ATA INTEL SSDSA2CW30", 8192},
|
||||
{"ATA INTEL SSDSA2CW60", 8192},
|
||||
{"ATA INTEL SSDSC2CT06", 8192},
|
||||
{"ATA INTEL SSDSC2CT12", 8192},
|
||||
{"ATA INTEL SSDSC2CT18", 8192},
|
||||
{"ATA INTEL SSDSC2CT24", 8192},
|
||||
{"ATA INTEL SSDSC2CW06", 8192},
|
||||
{"ATA INTEL SSDSC2CW12", 8192},
|
||||
{"ATA INTEL SSDSC2CW18", 8192},
|
||||
{"ATA INTEL SSDSC2CW24", 8192},
|
||||
{"ATA INTEL SSDSC2CW48", 8192},
|
||||
{"ATA KINGSTON SH100S3", 8192},
|
||||
{"ATA KINGSTON SH103S3", 8192},
|
||||
{"ATA M4-CT064M4SSD2 ", 8192},
|
||||
{"ATA M4-CT128M4SSD2 ", 8192},
|
||||
{"ATA M4-CT256M4SSD2 ", 8192},
|
||||
{"ATA M4-CT512M4SSD2 ", 8192},
|
||||
{"ATA OCZ-AGILITY2 ", 8192},
|
||||
{"ATA OCZ-AGILITY3 ", 8192},
|
||||
{"ATA OCZ-VERTEX2 3.5 ", 8192},
|
||||
{"ATA OCZ-VERTEX3 ", 8192},
|
||||
{"ATA OCZ-VERTEX3 LT ", 8192},
|
||||
{"ATA OCZ-VERTEX3 MI ", 8192},
|
||||
{"ATA OCZ-VERTEX4 ", 8192},
|
||||
{"ATA SAMSUNG MZ7WD120", 8192},
|
||||
{"ATA SAMSUNG MZ7WD240", 8192},
|
||||
{"ATA SAMSUNG MZ7WD480", 8192},
|
||||
{"ATA SAMSUNG MZ7WD960", 8192},
|
||||
{"ATA SAMSUNG SSD 830 ", 8192},
|
||||
{"ATA Samsung SSD 840 ", 8192},
|
||||
{"ATA SanDisk SSD U100", 8192},
|
||||
{"ATA TOSHIBA THNSNH06", 8192},
|
||||
{"ATA TOSHIBA THNSNH12", 8192},
|
||||
{"ATA TOSHIBA THNSNH25", 8192},
|
||||
{"ATA TOSHIBA THNSNH51", 8192},
|
||||
{"ATA APPLE SSD TS064C", 4096},
|
||||
{"ATA APPLE SSD TS128C", 4096},
|
||||
{"ATA APPLE SSD TS256C", 4096},
|
||||
{"ATA APPLE SSD TS512C", 4096},
|
||||
{"ATA INTEL SSDSA2M040", 4096},
|
||||
{"ATA INTEL SSDSA2M080", 4096},
|
||||
{"ATA INTEL SSDSA2M160", 4096},
|
||||
{"ATA INTEL SSDSC2MH12", 4096},
|
||||
{"ATA INTEL SSDSC2MH25", 4096},
|
||||
{"ATA OCZ CORE_SSD ", 4096},
|
||||
{"ATA OCZ-VERTEX ", 4096},
|
||||
{"ATA SAMSUNG MCCOE32G", 4096},
|
||||
{"ATA SAMSUNG MCCOE64G", 4096},
|
||||
{"ATA SAMSUNG SSD PM80", 4096},
|
||||
/* Flash drives optimized for 4KB IOs on larger pages */
|
||||
{"ATA INTEL SSDSC2BA10", 4096},
|
||||
{"ATA INTEL SSDSC2BA20", 4096},
|
||||
{"ATA INTEL SSDSC2BA40", 4096},
|
||||
{"ATA INTEL SSDSC2BA80", 4096},
|
||||
{"ATA INTEL SSDSC2BB08", 4096},
|
||||
{"ATA INTEL SSDSC2BB12", 4096},
|
||||
{"ATA INTEL SSDSC2BB16", 4096},
|
||||
{"ATA INTEL SSDSC2BB24", 4096},
|
||||
{"ATA INTEL SSDSC2BB30", 4096},
|
||||
{"ATA INTEL SSDSC2BB40", 4096},
|
||||
{"ATA INTEL SSDSC2BB48", 4096},
|
||||
{"ATA INTEL SSDSC2BB60", 4096},
|
||||
{"ATA INTEL SSDSC2BB80", 4096},
|
||||
{"ATA INTEL SSDSC2BW24", 4096},
|
||||
{"ATA INTEL SSDSC2BW48", 4096},
|
||||
{"ATA INTEL SSDSC2BP24", 4096},
|
||||
{"ATA INTEL SSDSC2BP48", 4096},
|
||||
{"NA SmrtStorSDLKAE9W", 4096},
|
||||
{"NVMe Amazon EC2 NVMe ", 4096},
|
||||
/* Imported from Open Solaris */
|
||||
{"ATA MARVELL SD88SA02", 4096},
|
||||
/* Advanced format Hard drives */
|
||||
{"ATA Hitachi HDS5C303", 4096},
|
||||
{"ATA SAMSUNG HD204UI ", 4096},
|
||||
{"ATA ST2000DL004 HD20", 4096},
|
||||
{"ATA WDC WD10EARS-00M", 4096},
|
||||
{"ATA WDC WD10EARS-00S", 4096},
|
||||
{"ATA WDC WD10EARS-00Z", 4096},
|
||||
{"ATA WDC WD15EARS-00M", 4096},
|
||||
{"ATA WDC WD15EARS-00S", 4096},
|
||||
{"ATA WDC WD15EARS-00Z", 4096},
|
||||
{"ATA WDC WD20EARS-00M", 4096},
|
||||
{"ATA WDC WD20EARS-00S", 4096},
|
||||
{"ATA WDC WD20EARS-00Z", 4096},
|
||||
{"ATA WDC WD1600BEVT-0", 4096},
|
||||
{"ATA WDC WD2500BEVT-0", 4096},
|
||||
{"ATA WDC WD3200BEVT-0", 4096},
|
||||
{"ATA WDC WD5000BEVT-0", 4096},
|
||||
/* Virtual disks: Assume zvols with default volblocksize */
|
||||
#if 0
|
||||
{"ATA QEMU HARDDISK ", 8192},
|
||||
{"IET VIRTUAL-DISK ", 8192},
|
||||
{"OI COMSTAR ", 8192},
|
||||
{"SUN COMSTAR ", 8192},
|
||||
{"NETAPP LUN ", 8192},
|
||||
#endif
|
||||
};
|
||||
|
||||
static const int vdev_disk_database_size =
|
||||
sizeof (vdev_disk_database) / sizeof (vdev_disk_database[0]);
|
||||
|
||||
#define INQ_REPLY_LEN 96
|
||||
#define INQ_CMD_LEN 6
|
||||
|
||||
static boolean_t
|
||||
check_sector_size_database(char *path, int *sector_size)
|
||||
{
|
||||
unsigned char inq_buff[INQ_REPLY_LEN];
|
||||
unsigned char sense_buffer[32];
|
||||
unsigned char inq_cmd_blk[INQ_CMD_LEN] =
|
||||
{INQUIRY, 0, 0, 0, INQ_REPLY_LEN, 0};
|
||||
sg_io_hdr_t io_hdr;
|
||||
int error;
|
||||
int fd;
|
||||
int i;
|
||||
|
||||
/* Prepare INQUIRY command */
|
||||
memset(&io_hdr, 0, sizeof (sg_io_hdr_t));
|
||||
io_hdr.interface_id = 'S';
|
||||
io_hdr.cmd_len = sizeof (inq_cmd_blk);
|
||||
io_hdr.mx_sb_len = sizeof (sense_buffer);
|
||||
io_hdr.dxfer_direction = SG_DXFER_FROM_DEV;
|
||||
io_hdr.dxfer_len = INQ_REPLY_LEN;
|
||||
io_hdr.dxferp = inq_buff;
|
||||
io_hdr.cmdp = inq_cmd_blk;
|
||||
io_hdr.sbp = sense_buffer;
|
||||
io_hdr.timeout = 10; /* 10 milliseconds is ample time */
|
||||
|
||||
if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
|
||||
return (B_FALSE);
|
||||
|
||||
error = ioctl(fd, SG_IO, (unsigned long) &io_hdr);
|
||||
|
||||
(void) close(fd);
|
||||
|
||||
if (error < 0)
|
||||
return (B_FALSE);
|
||||
|
||||
if ((io_hdr.info & SG_INFO_OK_MASK) != SG_INFO_OK)
|
||||
return (B_FALSE);
|
||||
|
||||
for (i = 0; i < vdev_disk_database_size; i++) {
|
||||
if (memcmp(inq_buff + 8, vdev_disk_database[i].id, 24))
|
||||
continue;
|
||||
|
||||
*sector_size = vdev_disk_database[i].sector_size;
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*PRINTFLIKE1*/
|
||||
static void
|
||||
void
|
||||
vdev_error(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
@ -303,7 +116,7 @@ vdev_error(const char *fmt, ...)
|
||||
* Check that a file is valid. All we can do in this case is check that it's
|
||||
* not in use by another pool, and not in use by swap.
|
||||
*/
|
||||
static int
|
||||
int
|
||||
check_file(const char *file, boolean_t force, boolean_t isspare)
|
||||
{
|
||||
char *name;
|
||||
@ -367,150 +180,6 @@ check_file(const char *file, boolean_t force, boolean_t isspare)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
check_slice(const char *path, blkid_cache cache, int force, boolean_t isspare)
|
||||
{
|
||||
int err;
|
||||
char *value;
|
||||
|
||||
/* No valid type detected device is safe to use */
|
||||
value = blkid_get_tag_value(cache, "TYPE", path);
|
||||
if (value == NULL)
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If libblkid detects a ZFS device, we check the device
|
||||
* using check_file() to see if it's safe. The one safe
|
||||
* case is a spare device shared between multiple pools.
|
||||
*/
|
||||
if (strcmp(value, "zfs_member") == 0) {
|
||||
err = check_file(path, force, isspare);
|
||||
} else {
|
||||
if (force) {
|
||||
err = 0;
|
||||
} else {
|
||||
err = -1;
|
||||
vdev_error(gettext("%s contains a filesystem of "
|
||||
"type '%s'\n"), path, value);
|
||||
}
|
||||
}
|
||||
|
||||
free(value);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate that a disk including all partitions are safe to use.
|
||||
*
|
||||
* For EFI labeled disks this can done relatively easily with the libefi
|
||||
* library. The partition numbers are extracted from the label and used
|
||||
* to generate the expected /dev/ paths. Each partition can then be
|
||||
* checked for conflicts.
|
||||
*
|
||||
* For non-EFI labeled disks (MBR/EBR/etc) the same process is possible
|
||||
* but due to the lack of a readily available libraries this scanning is
|
||||
* not implemented. Instead only the device path as given is checked.
|
||||
*/
|
||||
static int
|
||||
check_disk(const char *path, blkid_cache cache, int force,
|
||||
boolean_t isspare, boolean_t iswholedisk)
|
||||
{
|
||||
struct dk_gpt *vtoc;
|
||||
char slice_path[MAXPATHLEN];
|
||||
int err = 0;
|
||||
int fd, i;
|
||||
int flags = O_RDONLY|O_DIRECT;
|
||||
|
||||
if (!iswholedisk)
|
||||
return (check_slice(path, cache, force, isspare));
|
||||
|
||||
/* only spares can be shared, other devices require exclusive access */
|
||||
if (!isspare)
|
||||
flags |= O_EXCL;
|
||||
|
||||
if ((fd = open(path, flags)) < 0) {
|
||||
char *value = blkid_get_tag_value(cache, "TYPE", path);
|
||||
(void) fprintf(stderr, gettext("%s is in use and contains "
|
||||
"a %s filesystem.\n"), path, value ? value : "unknown");
|
||||
free(value);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Expected to fail for non-EFI labeled disks. Just check the device
|
||||
* as given and do not attempt to detect and scan partitions.
|
||||
*/
|
||||
err = efi_alloc_and_read(fd, &vtoc);
|
||||
if (err) {
|
||||
(void) close(fd);
|
||||
return (check_slice(path, cache, force, isspare));
|
||||
}
|
||||
|
||||
/*
|
||||
* The primary efi partition label is damaged however the secondary
|
||||
* label at the end of the device is intact. Rather than use this
|
||||
* label we should play it safe and treat this as a non efi device.
|
||||
*/
|
||||
if (vtoc->efi_flags & EFI_GPT_PRIMARY_CORRUPT) {
|
||||
efi_free(vtoc);
|
||||
(void) close(fd);
|
||||
|
||||
if (force) {
|
||||
/* Partitions will now be created using the backup */
|
||||
return (0);
|
||||
} else {
|
||||
vdev_error(gettext("%s contains a corrupt primary "
|
||||
"EFI label.\n"), path);
|
||||
return (-1);
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < vtoc->efi_nparts; i++) {
|
||||
|
||||
if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED ||
|
||||
uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid))
|
||||
continue;
|
||||
|
||||
if (strncmp(path, UDISK_ROOT, strlen(UDISK_ROOT)) == 0)
|
||||
(void) snprintf(slice_path, sizeof (slice_path),
|
||||
"%s%s%d", path, "-part", i+1);
|
||||
else
|
||||
(void) snprintf(slice_path, sizeof (slice_path),
|
||||
"%s%s%d", path, isdigit(path[strlen(path)-1]) ?
|
||||
"p" : "", i+1);
|
||||
|
||||
err = check_slice(slice_path, cache, force, isspare);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
|
||||
efi_free(vtoc);
|
||||
(void) close(fd);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
check_device(const char *path, boolean_t force,
|
||||
boolean_t isspare, boolean_t iswholedisk)
|
||||
{
|
||||
blkid_cache cache;
|
||||
int error;
|
||||
|
||||
error = blkid_get_cache(&cache, NULL);
|
||||
if (error != 0) {
|
||||
(void) fprintf(stderr, gettext("unable to access the blkid "
|
||||
"cache.\n"));
|
||||
return (-1);
|
||||
}
|
||||
|
||||
error = check_disk(path, cache, force, isspare, iswholedisk);
|
||||
blkid_put_cache(cache);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* This may be a shorthand device path or it could be total gibberish.
|
||||
* Check to see if it is a known device available in zfs_vdev_paths.
|
||||
|
@ -117,6 +117,8 @@ extern boolean_t is_mpath_whole_disk(const char *);
|
||||
#define is_mpath_whole_disk(path) (B_FALSE)
|
||||
#endif
|
||||
|
||||
extern boolean_t zfs_isnumber(const char *);
|
||||
|
||||
/*
|
||||
* Formats for iostat numbers. Examples: "12K", "30ms", "4B", "2321234", "-".
|
||||
*
|
||||
|
@ -23,10 +23,25 @@
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <libzutil.h>
|
||||
|
||||
/*
|
||||
* Return B_TRUE if "str" is a number string, B_FALSE otherwise.
|
||||
* Works for integer and floating point numbers.
|
||||
*/
|
||||
boolean_t
|
||||
zfs_isnumber(const char *str)
|
||||
{
|
||||
for (; *str; str++)
|
||||
if (!(isdigit(*str) || (*str == '.')))
|
||||
return (B_FALSE);
|
||||
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a number to an appropriately human-readable output.
|
||||
*/
|
||||
|
Loading…
x
Reference in New Issue
Block a user