2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* CDDL HEADER START
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the terms of the
|
|
|
|
* Common Development and Distribution License (the "License").
|
|
|
|
* You may not use this file except in compliance with the License.
|
|
|
|
*
|
|
|
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
|
|
|
* or http://www.opensolaris.org/os/licensing.
|
|
|
|
* See the License for the specific language governing permissions
|
|
|
|
* and limitations under the License.
|
|
|
|
*
|
|
|
|
* When distributing Covered Code, include this CDDL HEADER in each
|
|
|
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
|
|
|
* If applicable, add the following below this CDDL HEADER, with the
|
|
|
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
|
|
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
|
|
|
*
|
|
|
|
* CDDL HEADER END
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
2010-05-28 13:45:14 -07:00
|
|
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
2018-09-05 19:33:36 -06:00
|
|
|
* Copyright (c) 2013, 2018 by Delphix. All rights reserved.
|
|
|
|
* Copyright (c) 2016, 2017 Intel Corporation.
|
2017-01-17 14:42:56 -08:00
|
|
|
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Functions to convert between a list of vdevs and an nvlist representing the
|
|
|
|
* configuration. Each entry in the list can be one of:
|
|
|
|
*
|
|
|
|
* Device vdevs
|
|
|
|
* disk=(path=..., devid=...)
|
|
|
|
* file=(path=...)
|
|
|
|
*
|
|
|
|
* Group vdevs
|
|
|
|
* raidz[1|2]=(...)
|
|
|
|
* mirror=(...)
|
|
|
|
*
|
|
|
|
* Hot spares
|
|
|
|
*
|
|
|
|
* While the underlying implementation supports it, group vdevs cannot contain
|
|
|
|
* other group vdevs. All userland verification of devices is contained within
|
|
|
|
* this file. If successful, the nvlist returned can be passed directly to the
|
|
|
|
* kernel; we've done as much verification as possible in userland.
|
|
|
|
*
|
|
|
|
* Hot spares are a special case, and passed down as an array of disk vdevs, at
|
|
|
|
* the same level as the root of the vdev tree.
|
|
|
|
*
|
|
|
|
* The only function exported by this file is 'make_root_vdev'. The
|
|
|
|
* function performs several passes:
|
|
|
|
*
|
|
|
|
* 1. Construct the vdev specification. Performs syntax validation and
|
|
|
|
* makes sure each device is valid.
|
2010-08-26 11:56:53 -07:00
|
|
|
* 2. Check for devices in use. Using libblkid to make sure that no
|
2008-11-20 12:01:55 -08:00
|
|
|
* devices are also in use. Some can be overridden using the 'force'
|
|
|
|
* flag, others cannot.
|
|
|
|
* 3. Check for replication errors if the 'force' flag is not specified.
|
|
|
|
* validates that the replication level is consistent across the
|
|
|
|
* entire pool.
|
|
|
|
* 4. Call libzfs to label any whole disks with an EFI label.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <assert.h>
|
2010-08-26 11:56:53 -07:00
|
|
|
#include <ctype.h>
|
2008-11-20 12:01:55 -08:00
|
|
|
#include <errno.h>
|
|
|
|
#include <fcntl.h>
|
|
|
|
#include <libintl.h>
|
|
|
|
#include <libnvpair.h>
|
2018-11-05 12:22:33 -07:00
|
|
|
#include <libzutil.h>
|
2009-08-18 11:43:27 -07:00
|
|
|
#include <limits.h>
|
2017-03-29 02:21:11 +02:00
|
|
|
#include <sys/spa.h>
|
2008-11-20 12:01:55 -08:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include "zpool_util.h"
|
2013-08-10 08:24:40 -04:00
|
|
|
#include <sys/zfs_context.h>
|
2019-09-30 12:16:06 -07:00
|
|
|
#include <sys/stat.h>
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* For any given vdev specification, we can have multiple errors. The
|
|
|
|
* vdev_error() function keeps track of whether we have seen an error yet, and
|
|
|
|
* prints out a header if its the first error we've seen.
|
|
|
|
*/
|
|
|
|
boolean_t error_seen;
|
|
|
|
boolean_t is_force;
|
|
|
|
|
|
|
|
/*PRINTFLIKE1*/
|
2019-09-30 12:16:06 -07:00
|
|
|
void
|
2008-11-20 12:01:55 -08:00
|
|
|
vdev_error(const char *fmt, ...)
|
|
|
|
{
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
if (!error_seen) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid vdev specification\n"));
|
|
|
|
if (!is_force)
|
|
|
|
(void) fprintf(stderr, gettext("use '-f' to override "
|
|
|
|
"the following errors:\n"));
|
|
|
|
else
|
|
|
|
(void) fprintf(stderr, gettext("the following errors "
|
|
|
|
"must be manually repaired:\n"));
|
|
|
|
error_seen = B_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
va_start(ap, fmt);
|
|
|
|
(void) vfprintf(stderr, fmt, ap);
|
|
|
|
va_end(ap);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check that a file is valid. All we can do in this case is check that it's
|
|
|
|
* not in use by another pool, and not in use by swap.
|
|
|
|
*/
|
2019-09-30 12:16:06 -07:00
|
|
|
int
|
2008-11-20 12:01:55 -08:00
|
|
|
check_file(const char *file, boolean_t force, boolean_t isspare)
|
|
|
|
{
|
|
|
|
char *name;
|
|
|
|
int fd;
|
|
|
|
int ret = 0;
|
|
|
|
pool_state_t state;
|
|
|
|
boolean_t inuse;
|
|
|
|
|
|
|
|
if ((fd = open(file, O_RDONLY)) < 0)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) == 0 && inuse) {
|
|
|
|
const char *desc;
|
|
|
|
|
|
|
|
switch (state) {
|
|
|
|
case POOL_STATE_ACTIVE:
|
|
|
|
desc = gettext("active");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case POOL_STATE_EXPORTED:
|
|
|
|
desc = gettext("exported");
|
|
|
|
break;
|
|
|
|
|
|
|
|
case POOL_STATE_POTENTIALLY_ACTIVE:
|
|
|
|
desc = gettext("potentially active");
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
desc = gettext("unknown");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Allow hot spares to be shared between pools.
|
|
|
|
*/
|
2016-09-11 04:41:19 +08:00
|
|
|
if (state == POOL_STATE_SPARE && isspare) {
|
|
|
|
free(name);
|
|
|
|
(void) close(fd);
|
2008-11-20 12:01:55 -08:00
|
|
|
return (0);
|
2016-09-11 04:41:19 +08:00
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
if (state == POOL_STATE_ACTIVE ||
|
|
|
|
state == POOL_STATE_SPARE || !force) {
|
|
|
|
switch (state) {
|
|
|
|
case POOL_STATE_SPARE:
|
|
|
|
vdev_error(gettext("%s is reserved as a hot "
|
|
|
|
"spare for pool %s\n"), file, name);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
vdev_error(gettext("%s is part of %s pool "
|
|
|
|
"'%s'\n"), file, desc, name);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
free(name);
|
|
|
|
}
|
|
|
|
|
|
|
|
(void) close(fd);
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2010-08-26 11:56:53 -07:00
|
|
|
/*
|
|
|
|
* This may be a shorthand device path or it could be total gibberish.
|
2012-10-17 16:58:54 -07:00
|
|
|
* Check to see if it is a known device available in zfs_vdev_paths.
|
|
|
|
* As part of this check, see if we've been given an entire disk
|
|
|
|
* (minus the slice number).
|
2010-08-26 11:56:53 -07:00
|
|
|
*/
|
|
|
|
static int
|
2016-10-11 06:30:22 +08:00
|
|
|
is_shorthand_path(const char *arg, char *path, size_t path_size,
|
2013-11-01 20:26:11 +01:00
|
|
|
struct stat64 *statbuf, boolean_t *wholedisk)
|
2010-08-26 11:56:53 -07:00
|
|
|
{
|
2012-10-17 16:58:54 -07:00
|
|
|
int error;
|
|
|
|
|
2016-10-11 06:30:22 +08:00
|
|
|
error = zfs_resolve_shortname(arg, path, path_size);
|
2012-10-17 16:58:54 -07:00
|
|
|
if (error == 0) {
|
2017-01-13 09:25:15 -08:00
|
|
|
*wholedisk = zfs_dev_is_whole_disk(path);
|
Add helper functions for manipulating device names
This change adds two helper functions for working with vdev names and paths.
zfs_resolve_shortname() resolves a shorthand vdev name to an absolute path
of a file in /dev, /dev/disk/by-id, /dev/disk/by-label, /dev/disk/by-path,
/dev/disk/by-uuid, /dev/disk/zpool. This was previously done only in the
function is_shorthand_path(), but we need a general helper function to
implement shorthand names for additional zpool subcommands like remove.
is_shorthand_path() is accordingly updated to call the helper function.
There is a minor change in the way zfs_resolve_shortname() tests if a file
exists. is_shorthand_path() effectively used open() and stat64() to test for
file existence, since its scope includes testing if a device is a whole disk
and collecting file status information. zfs_resolve_shortname(), on the other
hand, only uses access() to test for existence and leaves it to the caller to
perform any additional file operations. This seemed like the most general and
lightweight approach, and still preserves the semantics of is_shorthand_path().
zfs_append_partition() appends a partition suffix to a device path. This
should be used to generate the name of a whole disk as it is stored in the vdev
label. The user-visible names of whole disks do not contain the partition
information, while the name in the vdev label does. The code was lifted from
the function make_disks(), which now just calls the helper function. Again,
having a helper function to do this supports general handling of shorthand
names in the user interface.
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
2010-10-13 16:16:25 -07:00
|
|
|
if (*wholedisk || (stat64(path, statbuf) == 0))
|
2010-08-26 11:56:53 -07:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2016-10-11 06:30:22 +08:00
|
|
|
strlcpy(path, arg, path_size);
|
2013-11-01 20:26:11 +01:00
|
|
|
memset(statbuf, 0, sizeof (*statbuf));
|
2010-08-26 11:56:53 -07:00
|
|
|
*wholedisk = B_FALSE;
|
|
|
|
|
2012-10-17 16:58:54 -07:00
|
|
|
return (error);
|
2010-08-26 11:56:53 -07:00
|
|
|
}
|
|
|
|
|
2013-02-26 17:02:27 -08:00
|
|
|
/*
|
|
|
|
* Determine if the given path is a hot spare within the given configuration.
|
|
|
|
* If no configuration is given we rely solely on the label.
|
|
|
|
*/
|
|
|
|
static boolean_t
|
|
|
|
is_spare(nvlist_t *config, const char *path)
|
|
|
|
{
|
|
|
|
int fd;
|
|
|
|
pool_state_t state;
|
|
|
|
char *name = NULL;
|
|
|
|
nvlist_t *label;
|
|
|
|
uint64_t guid, spareguid;
|
|
|
|
nvlist_t *nvroot;
|
|
|
|
nvlist_t **spares;
|
|
|
|
uint_t i, nspares;
|
|
|
|
boolean_t inuse;
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if (zpool_is_draid_spare(path))
|
|
|
|
return (B_TRUE);
|
|
|
|
|
2018-10-17 20:21:07 +02:00
|
|
|
if ((fd = open(path, O_RDONLY|O_DIRECT)) < 0)
|
2013-02-26 17:02:27 -08:00
|
|
|
return (B_FALSE);
|
|
|
|
|
|
|
|
if (zpool_in_use(g_zfs, fd, &state, &name, &inuse) != 0 ||
|
|
|
|
!inuse ||
|
|
|
|
state != POOL_STATE_SPARE ||
|
2015-03-20 15:10:24 -07:00
|
|
|
zpool_read_label(fd, &label, NULL) != 0) {
|
2013-02-26 17:02:27 -08:00
|
|
|
free(name);
|
|
|
|
(void) close(fd);
|
|
|
|
return (B_FALSE);
|
|
|
|
}
|
|
|
|
free(name);
|
|
|
|
(void) close(fd);
|
|
|
|
|
2016-09-11 04:41:19 +08:00
|
|
|
if (config == NULL) {
|
|
|
|
nvlist_free(label);
|
2013-02-26 17:02:27 -08:00
|
|
|
return (B_TRUE);
|
2016-09-11 04:41:19 +08:00
|
|
|
}
|
2013-02-26 17:02:27 -08:00
|
|
|
|
|
|
|
verify(nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) == 0);
|
|
|
|
nvlist_free(label);
|
|
|
|
|
|
|
|
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
|
|
|
&nvroot) == 0);
|
|
|
|
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
|
|
|
|
&spares, &nspares) == 0) {
|
|
|
|
for (i = 0; i < nspares; i++) {
|
|
|
|
verify(nvlist_lookup_uint64(spares[i],
|
|
|
|
ZPOOL_CONFIG_GUID, &spareguid) == 0);
|
|
|
|
if (spareguid == guid)
|
|
|
|
return (B_TRUE);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (B_FALSE);
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* Create a leaf vdev. Determine if this is a file or a device. If it's a
|
|
|
|
* device, fill in the device id to make a complete nvlist. Valid forms for a
|
|
|
|
* leaf vdev are:
|
|
|
|
*
|
2012-10-17 16:58:54 -07:00
|
|
|
* /dev/xxx Complete disk path
|
|
|
|
* /xxx Full path to file
|
|
|
|
* xxx Shorthand for <zfs_vdev_paths>/xxx
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
* draid* Virtual dRAID spare
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
|
|
|
static nvlist_t *
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
make_leaf_vdev(nvlist_t *props, const char *arg, boolean_t is_primary)
|
2008-11-20 12:01:55 -08:00
|
|
|
{
|
|
|
|
char path[MAXPATHLEN];
|
|
|
|
struct stat64 statbuf;
|
|
|
|
nvlist_t *vdev = NULL;
|
|
|
|
char *type = NULL;
|
|
|
|
boolean_t wholedisk = B_FALSE;
|
2013-08-10 08:24:40 -04:00
|
|
|
uint64_t ashift = 0;
|
2010-08-26 11:56:53 -07:00
|
|
|
int err;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Determine what type of vdev this is, and put the full path into
|
|
|
|
* 'path'. We detect whether this is a device of file afterwards by
|
|
|
|
* checking the st_mode of the file.
|
|
|
|
*/
|
|
|
|
if (arg[0] == '/') {
|
|
|
|
/*
|
|
|
|
* Complete device or file path. Exact type is determined by
|
2010-08-26 11:56:53 -07:00
|
|
|
* examining the file descriptor afterwards. Symbolic links
|
2017-01-13 09:25:15 -08:00
|
|
|
* are resolved to their real paths to determine whole disk
|
2010-08-26 11:56:53 -07:00
|
|
|
* and S_ISBLK/S_ISREG type checks. However, we are careful
|
|
|
|
* to store the given path as ZPOOL_CONFIG_PATH to ensure we
|
|
|
|
* can leverage udev's persistent device labels.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
2010-08-26 11:56:53 -07:00
|
|
|
if (realpath(arg, path) == NULL) {
|
2008-11-20 12:01:55 -08:00
|
|
|
(void) fprintf(stderr,
|
2010-08-26 11:56:53 -07:00
|
|
|
gettext("cannot resolve path '%s'\n"), arg);
|
2008-11-20 12:01:55 -08:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2017-01-13 09:25:15 -08:00
|
|
|
wholedisk = zfs_dev_is_whole_disk(path);
|
2008-11-20 12:01:55 -08:00
|
|
|
if (!wholedisk && (stat64(path, &statbuf) != 0)) {
|
2010-08-26 11:56:53 -07:00
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("cannot open '%s': %s\n"),
|
|
|
|
path, strerror(errno));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2017-01-13 09:25:15 -08:00
|
|
|
/* After whole disk check restore original passed path */
|
2016-10-11 06:30:22 +08:00
|
|
|
strlcpy(path, arg, sizeof (path));
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
} else if (zpool_is_draid_spare(arg)) {
|
|
|
|
if (!is_primary) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("cannot open '%s': dRAID spares can only "
|
|
|
|
"be used to replace primary vdevs\n"), arg);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
wholedisk = B_TRUE;
|
|
|
|
strlcpy(path, arg, sizeof (path));
|
|
|
|
type = VDEV_TYPE_DRAID_SPARE;
|
2010-08-26 11:56:53 -07:00
|
|
|
} else {
|
2016-10-11 06:30:22 +08:00
|
|
|
err = is_shorthand_path(arg, path, sizeof (path),
|
|
|
|
&statbuf, &wholedisk);
|
2010-08-26 11:56:53 -07:00
|
|
|
if (err != 0) {
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* If we got ENOENT, then the user gave us
|
|
|
|
* gibberish, so try to direct them with a
|
|
|
|
* reasonable error message. Otherwise,
|
|
|
|
* regurgitate strerror() since it's the best we
|
|
|
|
* can do.
|
|
|
|
*/
|
2010-08-26 11:56:53 -07:00
|
|
|
if (err == ENOENT) {
|
2008-11-20 12:01:55 -08:00
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("cannot open '%s': no such "
|
|
|
|
"device in %s\n"), arg, DISK_ROOT);
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("must be a full path or "
|
|
|
|
"shorthand device name\n"));
|
|
|
|
return (NULL);
|
|
|
|
} else {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("cannot open '%s': %s\n"),
|
|
|
|
path, strerror(errno));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if (type == NULL) {
|
|
|
|
/*
|
|
|
|
* Determine whether this is a device or a file.
|
|
|
|
*/
|
|
|
|
if (wholedisk || S_ISBLK(statbuf.st_mode)) {
|
|
|
|
type = VDEV_TYPE_DISK;
|
|
|
|
} else if (S_ISREG(statbuf.st_mode)) {
|
|
|
|
type = VDEV_TYPE_FILE;
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, gettext("cannot use '%s': must "
|
|
|
|
"be a block device or regular file\n"), path);
|
|
|
|
return (NULL);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Finally, we have the complete device or file, and we know that it is
|
|
|
|
* acceptable to use. Construct the nvlist to describe this vdev. All
|
|
|
|
* vdevs have a 'path' element, and devices also have a 'devid' element.
|
|
|
|
*/
|
|
|
|
verify(nvlist_alloc(&vdev, NV_UNIQUE_NAME, 0) == 0);
|
|
|
|
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_PATH, path) == 0);
|
|
|
|
verify(nvlist_add_string(vdev, ZPOOL_CONFIG_TYPE, type) == 0);
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
if (strcmp(type, VDEV_TYPE_DISK) == 0)
|
|
|
|
verify(nvlist_add_uint64(vdev, ZPOOL_CONFIG_WHOLE_DISK,
|
|
|
|
(uint64_t)wholedisk) == 0);
|
|
|
|
|
2013-08-10 08:24:40 -04:00
|
|
|
/*
|
|
|
|
* Override defaults if custom properties are provided.
|
|
|
|
*/
|
2011-06-16 21:56:38 +02:00
|
|
|
if (props != NULL) {
|
|
|
|
char *value = NULL;
|
|
|
|
|
|
|
|
if (nvlist_lookup_string(props,
|
2017-03-29 02:21:11 +02:00
|
|
|
zpool_prop_to_name(ZPOOL_PROP_ASHIFT), &value) == 0) {
|
2017-05-03 18:31:05 +02:00
|
|
|
if (zfs_nicestrtonum(NULL, value, &ashift) != 0) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("ashift must be a number.\n"));
|
|
|
|
return (NULL);
|
|
|
|
}
|
2017-03-29 02:21:11 +02:00
|
|
|
if (ashift != 0 &&
|
|
|
|
(ashift < ASHIFT_MIN || ashift > ASHIFT_MAX)) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("invalid 'ashift=%" PRIu64 "' "
|
|
|
|
"property: only values between %" PRId32 " "
|
|
|
|
"and %" PRId32 " are allowed.\n"),
|
|
|
|
ashift, ASHIFT_MIN, ASHIFT_MAX);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
2013-08-10 08:24:40 -04:00
|
|
|
}
|
2011-06-16 21:56:38 +02:00
|
|
|
|
2013-08-10 08:24:40 -04:00
|
|
|
/*
|
|
|
|
* If the device is known to incorrectly report its physical sector
|
|
|
|
* size explicitly provide the known correct value.
|
|
|
|
*/
|
|
|
|
if (ashift == 0) {
|
|
|
|
int sector_size;
|
|
|
|
|
|
|
|
if (check_sector_size_database(path, §or_size) == B_TRUE)
|
2014-04-15 19:40:22 -08:00
|
|
|
ashift = highbit64(sector_size) - 1;
|
2011-06-16 21:56:38 +02:00
|
|
|
}
|
|
|
|
|
2013-08-10 08:24:40 -04:00
|
|
|
if (ashift > 0)
|
2016-10-03 02:24:54 +08:00
|
|
|
(void) nvlist_add_uint64(vdev, ZPOOL_CONFIG_ASHIFT, ashift);
|
2013-08-10 08:24:40 -04:00
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
return (vdev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through and verify the replication level of the pool is consistent.
|
|
|
|
* Performs the following checks:
|
|
|
|
*
|
|
|
|
* For the new spec, verifies that devices in mirrors and raidz are the
|
|
|
|
* same size.
|
|
|
|
*
|
|
|
|
* If the current configuration already has inconsistent replication
|
|
|
|
* levels, ignore any other potential problems in the new spec.
|
|
|
|
*
|
|
|
|
* Otherwise, make sure that the current spec (if there is one) and the new
|
|
|
|
* spec have consistent replication levels.
|
2018-09-05 19:33:36 -06:00
|
|
|
*
|
|
|
|
* If there is no current spec (create), make sure new spec has at least
|
|
|
|
* one general purpose vdev.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
|
|
|
typedef struct replication_level {
|
|
|
|
char *zprl_type;
|
|
|
|
uint64_t zprl_children;
|
|
|
|
uint64_t zprl_parity;
|
|
|
|
} replication_level_t;
|
|
|
|
|
|
|
|
#define ZPOOL_FUZZ (16 * 1024 * 1024)
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/*
|
|
|
|
* N.B. For the purposes of comparing replication levels dRAID can be
|
2021-04-02 18:38:53 -07:00
|
|
|
* considered functionally equivalent to raidz.
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
*/
|
2017-04-06 00:21:13 +02:00
|
|
|
static boolean_t
|
|
|
|
is_raidz_mirror(replication_level_t *a, replication_level_t *b,
|
|
|
|
replication_level_t **raidz, replication_level_t **mirror)
|
|
|
|
{
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if ((strcmp(a->zprl_type, "raidz") == 0 ||
|
|
|
|
strcmp(a->zprl_type, "draid") == 0) &&
|
2017-04-06 00:21:13 +02:00
|
|
|
strcmp(b->zprl_type, "mirror") == 0) {
|
|
|
|
*raidz = a;
|
|
|
|
*mirror = b;
|
|
|
|
return (B_TRUE);
|
|
|
|
}
|
|
|
|
return (B_FALSE);
|
|
|
|
}
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/*
|
|
|
|
* Comparison for determining if dRAID and raidz where passed in either order.
|
|
|
|
*/
|
|
|
|
static boolean_t
|
|
|
|
is_raidz_draid(replication_level_t *a, replication_level_t *b)
|
|
|
|
{
|
|
|
|
if ((strcmp(a->zprl_type, "raidz") == 0 ||
|
|
|
|
strcmp(a->zprl_type, "draid") == 0) &&
|
|
|
|
(strcmp(b->zprl_type, "raidz") == 0 ||
|
|
|
|
strcmp(b->zprl_type, "draid") == 0)) {
|
|
|
|
return (B_TRUE);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (B_FALSE);
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* Given a list of toplevel vdevs, return the current replication level. If
|
|
|
|
* the config is inconsistent, then NULL is returned. If 'fatal' is set, then
|
|
|
|
* an error message will be displayed for each self-inconsistent vdev.
|
|
|
|
*/
|
|
|
|
static replication_level_t *
|
|
|
|
get_replication(nvlist_t *nvroot, boolean_t fatal)
|
|
|
|
{
|
|
|
|
nvlist_t **top;
|
|
|
|
uint_t t, toplevels;
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t c, children;
|
|
|
|
nvlist_t *nv;
|
|
|
|
char *type;
|
2017-01-17 14:42:56 -08:00
|
|
|
replication_level_t lastrep = {0};
|
|
|
|
replication_level_t rep;
|
|
|
|
replication_level_t *ret;
|
2017-04-06 00:21:13 +02:00
|
|
|
replication_level_t *raidz, *mirror;
|
2008-11-20 12:01:55 -08:00
|
|
|
boolean_t dontreport;
|
|
|
|
|
|
|
|
ret = safe_malloc(sizeof (replication_level_t));
|
|
|
|
|
|
|
|
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&top, &toplevels) == 0);
|
|
|
|
|
|
|
|
for (t = 0; t < toplevels; t++) {
|
|
|
|
uint64_t is_log = B_FALSE;
|
|
|
|
|
|
|
|
nv = top[t];
|
|
|
|
|
|
|
|
/*
|
|
|
|
* For separate logs we ignore the top level vdev replication
|
|
|
|
* constraints.
|
|
|
|
*/
|
|
|
|
(void) nvlist_lookup_uint64(nv, ZPOOL_CONFIG_IS_LOG, &is_log);
|
|
|
|
if (is_log)
|
|
|
|
continue;
|
|
|
|
|
2017-12-04 11:50:35 -08:00
|
|
|
/* Ignore holes introduced by removing aux devices */
|
|
|
|
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
|
|
|
|
if (strcmp(type, VDEV_TYPE_HOLE) == 0)
|
|
|
|
continue;
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) != 0) {
|
|
|
|
/*
|
|
|
|
* This is a 'file' or 'disk' vdev.
|
|
|
|
*/
|
|
|
|
rep.zprl_type = type;
|
|
|
|
rep.zprl_children = 1;
|
|
|
|
rep.zprl_parity = 0;
|
|
|
|
} else {
|
2019-09-22 18:27:53 -04:00
|
|
|
int64_t vdev_size;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is a mirror or RAID-Z vdev. Go through and make
|
|
|
|
* sure the contents are all the same (files vs. disks),
|
|
|
|
* keeping track of the number of elements in the
|
|
|
|
* process.
|
|
|
|
*
|
|
|
|
* We also check that the size of each vdev (if it can
|
|
|
|
* be determined) is the same.
|
|
|
|
*/
|
|
|
|
rep.zprl_type = type;
|
|
|
|
rep.zprl_children = 0;
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0 ||
|
|
|
|
strcmp(type, VDEV_TYPE_DRAID) == 0) {
|
2008-11-20 12:01:55 -08:00
|
|
|
verify(nvlist_lookup_uint64(nv,
|
|
|
|
ZPOOL_CONFIG_NPARITY,
|
|
|
|
&rep.zprl_parity) == 0);
|
|
|
|
assert(rep.zprl_parity != 0);
|
|
|
|
} else {
|
|
|
|
rep.zprl_parity = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The 'dontreport' variable indicates that we've
|
|
|
|
* already reported an error for this spec, so don't
|
|
|
|
* bother doing it again.
|
|
|
|
*/
|
|
|
|
type = NULL;
|
|
|
|
dontreport = 0;
|
2019-09-22 18:27:53 -04:00
|
|
|
vdev_size = -1LL;
|
2008-11-20 12:01:55 -08:00
|
|
|
for (c = 0; c < children; c++) {
|
|
|
|
nvlist_t *cnv = child[c];
|
|
|
|
char *path;
|
|
|
|
struct stat64 statbuf;
|
2019-09-22 18:27:53 -04:00
|
|
|
int64_t size = -1LL;
|
2008-11-20 12:01:55 -08:00
|
|
|
char *childtype;
|
|
|
|
int fd, err;
|
|
|
|
|
|
|
|
rep.zprl_children++;
|
|
|
|
|
|
|
|
verify(nvlist_lookup_string(cnv,
|
|
|
|
ZPOOL_CONFIG_TYPE, &childtype) == 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this is a replacing or spare vdev, then
|
2017-12-28 19:15:32 +01:00
|
|
|
* get the real first child of the vdev: do this
|
|
|
|
* in a loop because replacing and spare vdevs
|
|
|
|
* can be nested.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
2017-12-28 19:15:32 +01:00
|
|
|
while (strcmp(childtype,
|
2008-11-20 12:01:55 -08:00
|
|
|
VDEV_TYPE_REPLACING) == 0 ||
|
|
|
|
strcmp(childtype, VDEV_TYPE_SPARE) == 0) {
|
|
|
|
nvlist_t **rchild;
|
|
|
|
uint_t rchildren;
|
|
|
|
|
|
|
|
verify(nvlist_lookup_nvlist_array(cnv,
|
|
|
|
ZPOOL_CONFIG_CHILDREN, &rchild,
|
|
|
|
&rchildren) == 0);
|
|
|
|
assert(rchildren == 2);
|
|
|
|
cnv = rchild[0];
|
|
|
|
|
|
|
|
verify(nvlist_lookup_string(cnv,
|
|
|
|
ZPOOL_CONFIG_TYPE,
|
|
|
|
&childtype) == 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
verify(nvlist_lookup_string(cnv,
|
|
|
|
ZPOOL_CONFIG_PATH, &path) == 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have a raidz/mirror that combines disks
|
|
|
|
* with files, report it as an error.
|
|
|
|
*/
|
|
|
|
if (!dontreport && type != NULL &&
|
|
|
|
strcmp(type, childtype) != 0) {
|
|
|
|
if (ret != NULL)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication "
|
|
|
|
"level: %s contains both "
|
|
|
|
"files and devices\n"),
|
|
|
|
rep.zprl_type);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
dontreport = B_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* According to stat(2), the value of 'st_size'
|
|
|
|
* is undefined for block devices and character
|
|
|
|
* devices. But there is no effective way to
|
|
|
|
* determine the real size in userland.
|
|
|
|
*
|
|
|
|
* Instead, we'll take advantage of an
|
|
|
|
* implementation detail of spec_size(). If the
|
|
|
|
* device is currently open, then we (should)
|
|
|
|
* return a valid size.
|
|
|
|
*
|
|
|
|
* If we still don't get a valid size (indicated
|
|
|
|
* by a size of 0 or MAXOFFSET_T), then ignore
|
|
|
|
* this device altogether.
|
|
|
|
*/
|
|
|
|
if ((fd = open(path, O_RDONLY)) >= 0) {
|
Enable additional test cases
Enable additional test cases, in most cases this required a few
minor modifications to the test scripts. In a few cases a real
bug was uncovered and fixed. And in a handful of cases where pools
are layered on pools the test case will be skipped until this is
supported. Details below for each test case.
* zpool_add_004_pos - Skip test on Linux until adding zvols to pools
is fully supported and deadlock free.
* zpool_add_005_pos.ksh - Skip dumpadm portion of the test which isn't
relevant for Linux. The find_vfstab_dev, find_mnttab_dev, and
save_dump_dev functions were updated accordingly for Linux. Add
O_EXCL to the in-use check to prevent the -f (force) option from
working for mounted filesystems and improve the resulting error.
* zpool_add_006_pos - Update test case such that it doesn't depend
on nested pools. Switch to truncate from mkfile to reduce space
requirements and speed up the test case.
* zpool_clear_001_pos - Speed up test case by filling filesystem to
25% capacity.
* zpool_create_002_pos, zpool_create_004_pos - Use sparse files for
file vdevs in order to avoid increasing the partition size.
* zpool_create_006_pos - 6ba1ce9 allows raidz+mirror configs with
similar redundancy. Updating the valid_args and forced_args cases.
* zpool_create_008_pos - Disable overlapping partition portion.
* zpool_create_011_neg - Fix to correctly create the extra partition.
Modified zpool_vdev.c to use fstat64_blk() wrapper which includes
the st_size even for block devices.
* zpool_create_012_neg - Updated to properly find swap devices.
* zpool_create_014_neg, zpool_create_015_neg - Updated to use
swap_setup() and swap_cleanup() wrappers which do the right thing
on Linux and Illumos. Removed '-n' option which succeeds under
Linux due to differences in the in-use checks.
* zpool_create_016_pos.ksh - Skipped test case isn't useful.
* zpool_create_020_pos - Added missing / to cleanup() function.
Remove cache file prior to test to ensure a clean environment
and avoid false positives.
* zpool_destroy_001_pos - Removed test case which creates a pool on
a zvol. This is more likely to deadlock under Linux and has never
been completely supported on any platform.
* zpool_destroy_002_pos - 'zpool destroy -f' is unsupported on Linux.
Mount point must not be busy in order to unmount them.
* zfs_destroy_001_pos - Handle EBUSY error which can occur with
volumes when racing with udev.
* zpool_expand_001_pos, zpool_expand_003_neg - Skip test on Linux
until adding zvols to pools is fully supported and deadlock free.
The test could be modified to use loop-back devices but it would
be preferable to use the test case as is for improved coverage.
* zpool_export_004_pos - Updated test case to such that it doesn't
depend on nested pools. Normal file vdev under /var/tmp are fine.
* zpool_import_all_001_pos - Updated to skip partition 1, which is
known as slice 2, on Illumos. This prevents overwriting the
default TESTPOOL which was causing the failure.
* zpool_import_002_pos, zpool_import_012_pos - No changes needed.
* zpool_remove_003_pos - No changes needed
* zpool_upgrade_002_pos, zpool_upgrade_004_pos - Root cause addressed
by upstream OpenZFS commit 3b7f360.
* zpool_upgrade_007_pos - Disabled in test case due to known failure.
Opened issue https://github.com/zfsonlinux/zfs/issues/6112
* zvol_misc_002_pos - Updated to to use ext2.
* zvol_misc_001_neg, zvol_misc_003_neg, zvol_misc_004_pos,
zvol_misc_005_neg, zvol_misc_006_pos - Moved to skip list, these
test case could be updated to use Linux's crash dump facility.
* zvol_swap_* - Updated to use swap_setup/swap_cleanup helpers.
File creation switched from /tmp to /var/tmp. Enabled minimal
useful tests for Linux, skip test cases which aren't applicable.
Reviewed-by: Giuseppe Di Natale <dinatale2@llnl.gov>
Reviewed-by: loli10K <ezomori.nozomu@gmail.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Issue #3484
Issue #5634
Issue #2437
Issue #5202
Issue #4034
Closes #6095
2017-05-11 14:27:57 -07:00
|
|
|
err = fstat64_blk(fd, &statbuf);
|
2008-11-20 12:01:55 -08:00
|
|
|
(void) close(fd);
|
|
|
|
} else {
|
|
|
|
err = stat64(path, &statbuf);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (err != 0 ||
|
|
|
|
statbuf.st_size == 0 ||
|
|
|
|
statbuf.st_size == MAXOFFSET_T)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
size = statbuf.st_size;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Also make sure that devices and
|
|
|
|
* slices have a consistent size. If
|
|
|
|
* they differ by a significant amount
|
|
|
|
* (~16MB) then report an error.
|
|
|
|
*/
|
|
|
|
if (!dontreport &&
|
2019-09-22 18:27:53 -04:00
|
|
|
(vdev_size != -1LL &&
|
2020-07-31 21:30:31 -07:00
|
|
|
(llabs(size - vdev_size) >
|
2008-11-20 12:01:55 -08:00
|
|
|
ZPOOL_FUZZ))) {
|
|
|
|
if (ret != NULL)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"%s contains devices of "
|
|
|
|
"different sizes\n"),
|
|
|
|
rep.zprl_type);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
dontreport = B_TRUE;
|
|
|
|
}
|
|
|
|
|
|
|
|
type = childtype;
|
|
|
|
vdev_size = size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* At this point, we have the replication of the last toplevel
|
2018-09-05 19:33:36 -06:00
|
|
|
* vdev in 'rep'. Compare it to 'lastrep' to see if it is
|
2008-11-20 12:01:55 -08:00
|
|
|
* different.
|
|
|
|
*/
|
|
|
|
if (lastrep.zprl_type != NULL) {
|
2017-04-06 00:21:13 +02:00
|
|
|
if (is_raidz_mirror(&lastrep, &rep, &raidz, &mirror) ||
|
|
|
|
is_raidz_mirror(&rep, &lastrep, &raidz, &mirror)) {
|
|
|
|
/*
|
|
|
|
* Accepted raidz and mirror when they can
|
|
|
|
* handle the same number of disk failures.
|
|
|
|
*/
|
|
|
|
if (raidz->zprl_parity !=
|
|
|
|
mirror->zprl_children - 1) {
|
|
|
|
if (ret != NULL)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication "
|
|
|
|
"level: "
|
|
|
|
"%s and %s vdevs with "
|
|
|
|
"different redundancy, "
|
|
|
|
"%llu vs. %llu (%llu-way) "
|
|
|
|
"are present\n"),
|
|
|
|
raidz->zprl_type,
|
|
|
|
mirror->zprl_type,
|
|
|
|
raidz->zprl_parity,
|
|
|
|
mirror->zprl_children - 1,
|
|
|
|
mirror->zprl_children);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
}
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
} else if (is_raidz_draid(&lastrep, &rep)) {
|
|
|
|
/*
|
|
|
|
* Accepted raidz and draid when they can
|
|
|
|
* handle the same number of disk failures.
|
|
|
|
*/
|
|
|
|
if (lastrep.zprl_parity != rep.zprl_parity) {
|
|
|
|
if (ret != NULL)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication "
|
|
|
|
"level: %s and %s vdevs "
|
|
|
|
"with different "
|
|
|
|
"redundancy, %llu vs. "
|
|
|
|
"%llu are present\n"),
|
|
|
|
lastrep.zprl_type,
|
|
|
|
rep.zprl_type,
|
|
|
|
lastrep.zprl_parity,
|
|
|
|
rep.zprl_parity);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
}
|
2017-04-06 00:21:13 +02:00
|
|
|
} else if (strcmp(lastrep.zprl_type, rep.zprl_type) !=
|
|
|
|
0) {
|
2008-11-20 12:01:55 -08:00
|
|
|
if (ret != NULL)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: "
|
|
|
|
"both %s and %s vdevs are "
|
|
|
|
"present\n"),
|
|
|
|
lastrep.zprl_type, rep.zprl_type);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
} else if (lastrep.zprl_parity != rep.zprl_parity) {
|
|
|
|
if (ret)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: "
|
|
|
|
"both %llu and %llu device parity "
|
|
|
|
"%s vdevs are present\n"),
|
|
|
|
lastrep.zprl_parity,
|
|
|
|
rep.zprl_parity,
|
|
|
|
rep.zprl_type);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
} else if (lastrep.zprl_children != rep.zprl_children) {
|
|
|
|
if (ret)
|
|
|
|
free(ret);
|
|
|
|
ret = NULL;
|
|
|
|
if (fatal)
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: "
|
|
|
|
"both %llu-way and %llu-way %s "
|
|
|
|
"vdevs are present\n"),
|
|
|
|
lastrep.zprl_children,
|
|
|
|
rep.zprl_children,
|
|
|
|
rep.zprl_type);
|
|
|
|
else
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
lastrep = rep;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret != NULL)
|
|
|
|
*ret = rep;
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check the replication level of the vdev spec against the current pool. Calls
|
|
|
|
* get_replication() to make sure the new spec is self-consistent. If the pool
|
|
|
|
* has a consistent replication level, then we ignore any errors. Otherwise,
|
|
|
|
* report any difference between the two.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
check_replication(nvlist_t *config, nvlist_t *newroot)
|
|
|
|
{
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t children;
|
|
|
|
replication_level_t *current = NULL, *new;
|
2017-06-05 22:53:09 +02:00
|
|
|
replication_level_t *raidz, *mirror;
|
2008-11-20 12:01:55 -08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If we have a current pool configuration, check to see if it's
|
|
|
|
* self-consistent. If not, simply return success.
|
|
|
|
*/
|
|
|
|
if (config != NULL) {
|
|
|
|
nvlist_t *nvroot;
|
|
|
|
|
|
|
|
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
|
|
|
|
&nvroot) == 0);
|
|
|
|
if ((current = get_replication(nvroot, B_FALSE)) == NULL)
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
/*
|
|
|
|
* for spares there may be no children, and therefore no
|
|
|
|
* replication level to check
|
|
|
|
*/
|
|
|
|
if ((nvlist_lookup_nvlist_array(newroot, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) != 0) || (children == 0)) {
|
|
|
|
free(current);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If all we have is logs then there's no replication level to check.
|
|
|
|
*/
|
|
|
|
if (num_logs(newroot) == children) {
|
|
|
|
free(current);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the replication level of the new vdev spec, reporting any
|
|
|
|
* inconsistencies found.
|
|
|
|
*/
|
|
|
|
if ((new = get_replication(newroot, B_TRUE)) == NULL) {
|
|
|
|
free(current);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check to see if the new vdev spec matches the replication level of
|
|
|
|
* the current pool.
|
|
|
|
*/
|
|
|
|
ret = 0;
|
|
|
|
if (current != NULL) {
|
2017-06-05 22:53:09 +02:00
|
|
|
if (is_raidz_mirror(current, new, &raidz, &mirror) ||
|
|
|
|
is_raidz_mirror(new, current, &raidz, &mirror)) {
|
|
|
|
if (raidz->zprl_parity != mirror->zprl_children - 1) {
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: pool and "
|
|
|
|
"new vdev with different redundancy, %s "
|
|
|
|
"and %s vdevs, %llu vs. %llu (%llu-way)\n"),
|
|
|
|
raidz->zprl_type,
|
|
|
|
mirror->zprl_type,
|
|
|
|
raidz->zprl_parity,
|
|
|
|
mirror->zprl_children - 1,
|
|
|
|
mirror->zprl_children);
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
} else if (strcmp(current->zprl_type, new->zprl_type) != 0) {
|
2008-11-20 12:01:55 -08:00
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: pool uses %s "
|
|
|
|
"and new vdev is %s\n"),
|
|
|
|
current->zprl_type, new->zprl_type);
|
|
|
|
ret = -1;
|
|
|
|
} else if (current->zprl_parity != new->zprl_parity) {
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: pool uses %llu "
|
|
|
|
"device parity and new vdev uses %llu\n"),
|
|
|
|
current->zprl_parity, new->zprl_parity);
|
|
|
|
ret = -1;
|
|
|
|
} else if (current->zprl_children != new->zprl_children) {
|
|
|
|
vdev_error(gettext(
|
|
|
|
"mismatched replication level: pool uses %llu-way "
|
|
|
|
"%s and new vdev uses %llu-way %s\n"),
|
|
|
|
current->zprl_children, current->zprl_type,
|
|
|
|
new->zprl_children, new->zprl_type);
|
|
|
|
ret = -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
free(new);
|
|
|
|
if (current != NULL)
|
|
|
|
free(current);
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2010-08-26 11:56:53 -07:00
|
|
|
static int
|
|
|
|
zero_label(char *path)
|
|
|
|
{
|
|
|
|
const int size = 4096;
|
|
|
|
char buf[size];
|
|
|
|
int err, fd;
|
|
|
|
|
|
|
|
if ((fd = open(path, O_WRONLY|O_EXCL)) < 0) {
|
|
|
|
(void) fprintf(stderr, gettext("cannot open '%s': %s\n"),
|
|
|
|
path, strerror(errno));
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(buf, 0, size);
|
|
|
|
err = write(fd, buf, size);
|
|
|
|
(void) fdatasync(fd);
|
|
|
|
(void) close(fd);
|
|
|
|
|
|
|
|
if (err == -1) {
|
|
|
|
(void) fprintf(stderr, gettext("cannot zero first %d bytes "
|
|
|
|
"of '%s': %s\n"), size, path, strerror(errno));
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (err != size) {
|
|
|
|
(void) fprintf(stderr, gettext("could only zero %d/%d bytes "
|
|
|
|
"of '%s'\n"), err, size, path);
|
|
|
|
return (-1);
|
|
|
|
}
|
|
|
|
|
2013-11-01 20:26:11 +01:00
|
|
|
return (0);
|
2010-08-26 11:56:53 -07:00
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* Go through and find any whole disks in the vdev specification, labelling them
|
|
|
|
* as appropriate. When constructing the vdev spec, we were unable to open this
|
|
|
|
* device in order to provide a devid. Now that we have labelled the disk and
|
|
|
|
* know that slice 0 is valid, we can construct the devid now.
|
|
|
|
*
|
|
|
|
* If the disk was already labeled with an EFI label, we will have gotten the
|
|
|
|
* devid already (because we were able to open the whole disk). Otherwise, we
|
|
|
|
* need to get the devid after we label the disk.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
make_disks(zpool_handle_t *zhp, nvlist_t *nv)
|
|
|
|
{
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t c, children;
|
2013-02-26 17:02:27 -08:00
|
|
|
char *type, *path;
|
2010-10-21 17:08:30 -07:00
|
|
|
char devpath[MAXPATHLEN];
|
|
|
|
char udevpath[MAXPATHLEN];
|
2008-11-20 12:01:55 -08:00
|
|
|
uint64_t wholedisk;
|
2010-10-21 17:08:30 -07:00
|
|
|
struct stat64 statbuf;
|
2013-02-26 17:02:27 -08:00
|
|
|
int is_exclusive = 0;
|
|
|
|
int fd;
|
2008-11-20 12:01:55 -08:00
|
|
|
int ret;
|
|
|
|
|
|
|
|
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
|
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) != 0) {
|
|
|
|
|
|
|
|
if (strcmp(type, VDEV_TYPE_DISK) != 0)
|
|
|
|
return (0);
|
|
|
|
|
|
|
|
/*
|
2010-08-26 11:56:53 -07:00
|
|
|
* We have a disk device. If this is a whole disk write
|
|
|
|
* out the efi partition table, otherwise write zero's to
|
|
|
|
* the first 4k of the partition. This is to ensure that
|
|
|
|
* libblkid will not misidentify the partition due to a
|
|
|
|
* magic value left by the previous filesystem.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
2010-08-26 11:56:53 -07:00
|
|
|
verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
|
|
|
|
verify(!nvlist_lookup_uint64(nv, ZPOOL_CONFIG_WHOLE_DISK,
|
|
|
|
&wholedisk));
|
|
|
|
|
|
|
|
if (!wholedisk) {
|
2016-03-14 10:04:21 -06:00
|
|
|
/*
|
|
|
|
* Update device id string for mpath nodes (Linux only)
|
|
|
|
*/
|
|
|
|
if (is_mpath_whole_disk(path))
|
|
|
|
update_vdev_config_dev_strs(nv);
|
|
|
|
|
2017-07-24 12:49:27 -07:00
|
|
|
if (!is_spare(NULL, path))
|
|
|
|
(void) zero_label(path);
|
2013-02-26 17:02:27 -08:00
|
|
|
return (0);
|
2010-08-26 11:56:53 -07:00
|
|
|
}
|
|
|
|
|
2010-10-21 17:08:30 -07:00
|
|
|
if (realpath(path, devpath) == NULL) {
|
2010-08-26 11:56:53 -07:00
|
|
|
ret = errno;
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("cannot resolve path '%s'\n"), path);
|
|
|
|
return (ret);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
|
2010-10-21 17:08:30 -07:00
|
|
|
/*
|
|
|
|
* Remove any previously existing symlink from a udev path to
|
2016-04-19 11:19:12 -07:00
|
|
|
* the device before labeling the disk. This ensures that
|
|
|
|
* only newly created links are used. Otherwise there is a
|
|
|
|
* window between when udev deletes and recreates the link
|
|
|
|
* during which access attempts will fail with ENOENT.
|
2010-10-21 17:08:30 -07:00
|
|
|
*/
|
2016-09-23 06:55:41 +08:00
|
|
|
strlcpy(udevpath, path, MAXPATHLEN);
|
2012-10-17 16:58:54 -07:00
|
|
|
(void) zfs_append_partition(udevpath, MAXPATHLEN);
|
|
|
|
|
2013-02-26 17:02:27 -08:00
|
|
|
fd = open(devpath, O_RDWR|O_EXCL);
|
|
|
|
if (fd == -1) {
|
|
|
|
if (errno == EBUSY)
|
|
|
|
is_exclusive = 1;
|
2020-04-14 11:36:28 -07:00
|
|
|
#ifdef __FreeBSD__
|
|
|
|
if (errno == EPERM)
|
|
|
|
is_exclusive = 1;
|
|
|
|
#endif
|
2013-02-26 17:02:27 -08:00
|
|
|
} else {
|
|
|
|
(void) close(fd);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
/*
|
2013-02-26 17:02:27 -08:00
|
|
|
* If the partition exists, contains a valid spare label,
|
|
|
|
* and is opened exclusively there is no need to partition
|
|
|
|
* it. Hot spares have already been partitioned and are
|
|
|
|
* held open exclusively by the kernel as a safety measure.
|
|
|
|
*
|
|
|
|
* If the provided path is for a /dev/disk/ device its
|
|
|
|
* symbolic link will be removed, partition table created,
|
|
|
|
* and then block until udev creates the new link.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
2018-11-20 19:22:53 +01:00
|
|
|
if (!is_exclusive && !is_spare(NULL, udevpath)) {
|
2016-04-19 11:19:12 -07:00
|
|
|
char *devnode = strrchr(devpath, '/') + 1;
|
|
|
|
|
2013-11-01 20:26:11 +01:00
|
|
|
ret = strncmp(udevpath, UDISK_ROOT, strlen(UDISK_ROOT));
|
2013-02-26 17:02:27 -08:00
|
|
|
if (ret == 0) {
|
|
|
|
ret = lstat64(udevpath, &statbuf);
|
|
|
|
if (ret == 0 && S_ISLNK(statbuf.st_mode))
|
|
|
|
(void) unlink(udevpath);
|
|
|
|
}
|
|
|
|
|
2016-04-19 11:19:12 -07:00
|
|
|
/*
|
|
|
|
* When labeling a pool the raw device node name
|
|
|
|
* is provided as it appears under /dev/.
|
|
|
|
*/
|
|
|
|
if (zpool_label_disk(g_zfs, zhp, devnode) == -1)
|
2013-02-26 17:02:27 -08:00
|
|
|
return (-1);
|
|
|
|
|
2016-04-19 11:19:12 -07:00
|
|
|
/*
|
|
|
|
* Wait for udev to signal the device is available
|
|
|
|
* by the provided path.
|
|
|
|
*/
|
2013-10-11 14:24:18 -07:00
|
|
|
ret = zpool_label_disk_wait(udevpath, DISK_LABEL_WAIT);
|
2013-02-26 17:02:27 -08:00
|
|
|
if (ret) {
|
2016-04-19 11:19:12 -07:00
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("missing link: %s was "
|
|
|
|
"partitioned but %s is missing\n"),
|
|
|
|
devnode, udevpath);
|
|
|
|
return (ret);
|
2013-02-26 17:02:27 -08:00
|
|
|
}
|
|
|
|
|
2016-04-19 11:19:12 -07:00
|
|
|
ret = zero_label(udevpath);
|
|
|
|
if (ret)
|
|
|
|
return (ret);
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-10-17 16:58:54 -07:00
|
|
|
* Update the path to refer to the partition. The presence of
|
2008-11-20 12:01:55 -08:00
|
|
|
* the 'whole_disk' field indicates to the CLI that we should
|
2012-10-17 16:58:54 -07:00
|
|
|
* chop off the partition number when displaying the device in
|
2008-11-20 12:01:55 -08:00
|
|
|
* future output.
|
|
|
|
*/
|
2010-10-21 17:08:30 -07:00
|
|
|
verify(nvlist_add_string(nv, ZPOOL_CONFIG_PATH, udevpath) == 0);
|
2008-11-20 12:01:55 -08:00
|
|
|
|
2016-03-14 10:04:21 -06:00
|
|
|
/*
|
|
|
|
* Update device id strings for whole disks (Linux only)
|
|
|
|
*/
|
2016-04-19 11:19:12 -07:00
|
|
|
update_vdev_config_dev_strs(nv);
|
2016-03-14 10:04:21 -06:00
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
if ((ret = make_disks(zhp, child[c])) != 0)
|
|
|
|
return (ret);
|
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
|
|
|
|
&child, &children) == 0)
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
if ((ret = make_disks(zhp, child[c])) != 0)
|
|
|
|
return (ret);
|
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
|
|
|
|
&child, &children) == 0)
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
if ((ret = make_disks(zhp, child[c])) != 0)
|
|
|
|
return (ret);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Go through and find any devices that are in use. We rely on libdiskmgt for
|
|
|
|
* the majority of this task.
|
|
|
|
*/
|
2015-07-05 21:17:49 +02:00
|
|
|
static boolean_t
|
|
|
|
is_device_in_use(nvlist_t *config, nvlist_t *nv, boolean_t force,
|
2010-05-28 13:45:14 -07:00
|
|
|
boolean_t replacing, boolean_t isspare)
|
2008-11-20 12:01:55 -08:00
|
|
|
{
|
|
|
|
nvlist_t **child;
|
|
|
|
uint_t c, children;
|
|
|
|
char *type, *path;
|
2010-08-26 11:56:53 -07:00
|
|
|
int ret = 0;
|
2008-11-20 12:01:55 -08:00
|
|
|
char buf[MAXPATHLEN];
|
2010-08-26 11:56:53 -07:00
|
|
|
uint64_t wholedisk = B_FALSE;
|
2015-07-05 21:17:49 +02:00
|
|
|
boolean_t anyinuse = B_FALSE;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
verify(nvlist_lookup_string(nv, ZPOOL_CONFIG_TYPE, &type) == 0);
|
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&child, &children) != 0) {
|
|
|
|
|
2010-08-26 11:56:53 -07:00
|
|
|
verify(!nvlist_lookup_string(nv, ZPOOL_CONFIG_PATH, &path));
|
|
|
|
if (strcmp(type, VDEV_TYPE_DISK) == 0)
|
|
|
|
verify(!nvlist_lookup_uint64(nv,
|
2013-11-01 20:26:11 +01:00
|
|
|
ZPOOL_CONFIG_WHOLE_DISK, &wholedisk));
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* As a generic check, we look to see if this is a replace of a
|
|
|
|
* hot spare within the same pool. If so, we allow it
|
2010-08-26 11:56:53 -07:00
|
|
|
* regardless of what libblkid or zpool_in_use() says.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
2010-05-28 13:45:14 -07:00
|
|
|
if (replacing) {
|
2013-02-26 17:02:27 -08:00
|
|
|
(void) strlcpy(buf, path, sizeof (buf));
|
|
|
|
if (wholedisk) {
|
|
|
|
ret = zfs_append_partition(buf, sizeof (buf));
|
|
|
|
if (ret == -1)
|
|
|
|
return (-1);
|
|
|
|
}
|
2010-05-28 13:45:14 -07:00
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
if (is_spare(config, buf))
|
2015-07-05 21:17:49 +02:00
|
|
|
return (B_FALSE);
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(type, VDEV_TYPE_DISK) == 0)
|
2010-08-26 11:56:53 -07:00
|
|
|
ret = check_device(path, force, isspare, wholedisk);
|
2008-11-20 12:01:55 -08:00
|
|
|
|
2015-07-05 21:17:49 +02:00
|
|
|
else if (strcmp(type, VDEV_TYPE_FILE) == 0)
|
2008-11-20 12:01:55 -08:00
|
|
|
ret = check_file(path, force, isspare);
|
|
|
|
|
2015-07-05 21:17:49 +02:00
|
|
|
return (ret != 0);
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++)
|
2015-07-05 21:17:49 +02:00
|
|
|
if (is_device_in_use(config, child[c], force, replacing,
|
|
|
|
B_FALSE))
|
|
|
|
anyinuse = B_TRUE;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_SPARES,
|
|
|
|
&child, &children) == 0)
|
|
|
|
for (c = 0; c < children; c++)
|
2015-07-05 21:17:49 +02:00
|
|
|
if (is_device_in_use(config, child[c], force, replacing,
|
|
|
|
B_TRUE))
|
|
|
|
anyinuse = B_TRUE;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
if (nvlist_lookup_nvlist_array(nv, ZPOOL_CONFIG_L2CACHE,
|
|
|
|
&child, &children) == 0)
|
|
|
|
for (c = 0; c < children; c++)
|
2015-07-05 21:17:49 +02:00
|
|
|
if (is_device_in_use(config, child[c], force, replacing,
|
|
|
|
B_FALSE))
|
|
|
|
anyinuse = B_TRUE;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
2015-07-05 21:17:49 +02:00
|
|
|
return (anyinuse);
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/*
|
|
|
|
* Returns the parity level extracted from a raidz or draid type.
|
|
|
|
* If the parity cannot be determined zero is returned.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
get_parity(const char *type)
|
2008-11-20 12:01:55 -08:00
|
|
|
{
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
long parity = 0;
|
|
|
|
const char *p;
|
|
|
|
|
|
|
|
if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0) {
|
|
|
|
p = type + strlen(VDEV_TYPE_RAIDZ);
|
2009-08-18 11:43:27 -07:00
|
|
|
|
|
|
|
if (*p == '\0') {
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/* when unspecified default to single parity */
|
|
|
|
return (1);
|
2009-08-18 11:43:27 -07:00
|
|
|
} else if (*p == '0') {
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/* no zero prefixes allowed */
|
|
|
|
return (0);
|
2009-08-18 11:43:27 -07:00
|
|
|
} else {
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/* 0-3, no suffixes allowed */
|
|
|
|
char *end;
|
2009-08-18 11:43:27 -07:00
|
|
|
errno = 0;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
parity = strtol(p, &end, 10);
|
|
|
|
if (errno != 0 || *end != '\0' ||
|
|
|
|
parity < 1 || parity > VDEV_RAIDZ_MAXPARITY) {
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else if (strncmp(type, VDEV_TYPE_DRAID,
|
|
|
|
strlen(VDEV_TYPE_DRAID)) == 0) {
|
|
|
|
p = type + strlen(VDEV_TYPE_DRAID);
|
|
|
|
|
|
|
|
if (*p == '\0' || *p == ':') {
|
|
|
|
/* when unspecified default to single parity */
|
|
|
|
return (1);
|
|
|
|
} else if (*p == '0') {
|
|
|
|
/* no zero prefixes allowed */
|
|
|
|
return (0);
|
|
|
|
} else {
|
|
|
|
/* 0-3, allowed suffixes: '\0' or ':' */
|
|
|
|
char *end;
|
|
|
|
errno = 0;
|
|
|
|
parity = strtol(p, &end, 10);
|
|
|
|
if (errno != 0 ||
|
|
|
|
parity < 1 || parity > VDEV_DRAID_MAXPARITY ||
|
|
|
|
(*end != '\0' && *end != ':')) {
|
|
|
|
return (0);
|
|
|
|
}
|
2009-08-18 11:43:27 -07:00
|
|
|
}
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
return ((int)parity);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Assign the minimum and maximum number of devices allowed for
|
|
|
|
* the specified type. On error NULL is returned, otherwise the
|
|
|
|
* type prefix is returned (raidz, mirror, etc).
|
|
|
|
*/
|
|
|
|
static const char *
|
|
|
|
is_grouping(const char *type, int *mindev, int *maxdev)
|
|
|
|
{
|
|
|
|
int nparity;
|
|
|
|
|
|
|
|
if (strncmp(type, VDEV_TYPE_RAIDZ, strlen(VDEV_TYPE_RAIDZ)) == 0 ||
|
|
|
|
strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) == 0) {
|
|
|
|
nparity = get_parity(type);
|
|
|
|
if (nparity == 0)
|
|
|
|
return (NULL);
|
2008-11-20 12:01:55 -08:00
|
|
|
if (mindev != NULL)
|
2009-08-18 11:43:27 -07:00
|
|
|
*mindev = nparity + 1;
|
|
|
|
if (maxdev != NULL)
|
|
|
|
*maxdev = 255;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
|
|
|
|
if (strncmp(type, VDEV_TYPE_RAIDZ,
|
|
|
|
strlen(VDEV_TYPE_RAIDZ)) == 0) {
|
|
|
|
return (VDEV_TYPE_RAIDZ);
|
|
|
|
} else {
|
|
|
|
return (VDEV_TYPE_DRAID);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
2009-08-18 11:43:27 -07:00
|
|
|
if (maxdev != NULL)
|
|
|
|
*maxdev = INT_MAX;
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
if (strcmp(type, "mirror") == 0) {
|
|
|
|
if (mindev != NULL)
|
|
|
|
*mindev = 2;
|
|
|
|
return (VDEV_TYPE_MIRROR);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(type, "spare") == 0) {
|
|
|
|
if (mindev != NULL)
|
|
|
|
*mindev = 1;
|
|
|
|
return (VDEV_TYPE_SPARE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(type, "log") == 0) {
|
|
|
|
if (mindev != NULL)
|
|
|
|
*mindev = 1;
|
|
|
|
return (VDEV_TYPE_LOG);
|
|
|
|
}
|
|
|
|
|
2018-09-05 19:33:36 -06:00
|
|
|
if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0 ||
|
|
|
|
strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
|
|
|
|
if (mindev != NULL)
|
|
|
|
*mindev = 1;
|
|
|
|
return (type);
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
if (strcmp(type, "cache") == 0) {
|
|
|
|
if (mindev != NULL)
|
|
|
|
*mindev = 1;
|
|
|
|
return (VDEV_TYPE_L2CACHE);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
/*
|
|
|
|
* Extract the configuration parameters encoded in the dRAID type and
|
|
|
|
* use them to generate a dRAID configuration. The expected format is:
|
|
|
|
*
|
|
|
|
* draid[<parity>][:<data><d|D>][:<children><c|C>][:<spares><s|S>]
|
|
|
|
*
|
|
|
|
* The intent is to be able to generate a good configuration when no
|
|
|
|
* additional information is provided. The only mandatory component
|
|
|
|
* of the 'type' is the 'draid' prefix. If a value is not provided
|
|
|
|
* then reasonable defaults are used. The optional components may
|
|
|
|
* appear in any order but the d/s/c suffix is required.
|
|
|
|
*
|
|
|
|
* Valid inputs:
|
|
|
|
* - data: number of data devices per group (1-255)
|
|
|
|
* - parity: number of parity blocks per group (1-3)
|
|
|
|
* - spares: number of distributed spare (0-100)
|
|
|
|
* - children: total number of devices (1-255)
|
|
|
|
*
|
|
|
|
* Examples:
|
|
|
|
* - zpool create tank draid <devices...>
|
|
|
|
* - zpool create tank draid2:8d:51c:2s <devices...>
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
draid_config_by_type(nvlist_t *nv, const char *type, uint64_t children)
|
|
|
|
{
|
|
|
|
uint64_t nparity = 1;
|
|
|
|
uint64_t nspares = 0;
|
|
|
|
uint64_t ndata = UINT64_MAX;
|
|
|
|
uint64_t ngroups = 1;
|
|
|
|
long value;
|
|
|
|
|
|
|
|
if (strncmp(type, VDEV_TYPE_DRAID, strlen(VDEV_TYPE_DRAID)) != 0)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
nparity = (uint64_t)get_parity(type);
|
|
|
|
if (nparity == 0)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
char *p = (char *)type;
|
|
|
|
while ((p = strchr(p, ':')) != NULL) {
|
|
|
|
char *end;
|
|
|
|
|
|
|
|
p = p + 1;
|
|
|
|
errno = 0;
|
|
|
|
|
|
|
|
if (!isdigit(p[0])) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid dRAID "
|
|
|
|
"syntax; expected [:<number><c|d|s>] not '%s'\n"),
|
|
|
|
type);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Expected non-zero value with c/d/s suffix */
|
|
|
|
value = strtol(p, &end, 10);
|
|
|
|
char suffix = tolower(*end);
|
|
|
|
if (errno != 0 ||
|
|
|
|
(suffix != 'c' && suffix != 'd' && suffix != 's')) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid dRAID "
|
|
|
|
"syntax; expected [:<number><c|d|s>] not '%s'\n"),
|
|
|
|
type);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (suffix == 'c') {
|
|
|
|
if ((uint64_t)value != children) {
|
|
|
|
fprintf(stderr,
|
|
|
|
gettext("invalid number of dRAID children; "
|
|
|
|
"%llu required but %llu provided\n"),
|
|
|
|
(u_longlong_t)value,
|
|
|
|
(u_longlong_t)children);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
} else if (suffix == 'd') {
|
|
|
|
ndata = (uint64_t)value;
|
|
|
|
} else if (suffix == 's') {
|
|
|
|
nspares = (uint64_t)value;
|
|
|
|
} else {
|
|
|
|
verify(0); /* Unreachable */
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* When a specific number of data disks is not provided limit a
|
|
|
|
* redundancy group to 8 data disks. This value was selected to
|
|
|
|
* provide a reasonable tradeoff between capacity and performance.
|
|
|
|
*/
|
|
|
|
if (ndata == UINT64_MAX) {
|
|
|
|
if (children > nspares + nparity) {
|
|
|
|
ndata = MIN(children - nspares - nparity, 8);
|
|
|
|
} else {
|
|
|
|
fprintf(stderr, gettext("request number of "
|
|
|
|
"distributed spares %llu and parity level %llu\n"
|
|
|
|
"leaves no disks available for data\n"),
|
|
|
|
(u_longlong_t)nspares, (u_longlong_t)nparity);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Verify the maximum allowed group size is never exceeded. */
|
|
|
|
if (ndata == 0 || (ndata + nparity > children - nspares)) {
|
|
|
|
fprintf(stderr, gettext("requested number of dRAID data "
|
|
|
|
"disks per group %llu is too high,\nat most %llu disks "
|
|
|
|
"are available for data\n"), (u_longlong_t)ndata,
|
|
|
|
(u_longlong_t)(children - nspares - nparity));
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (nparity == 0 || nparity > VDEV_DRAID_MAXPARITY) {
|
|
|
|
fprintf(stderr,
|
|
|
|
gettext("invalid dRAID parity level %llu; must be "
|
|
|
|
"between 1 and %d\n"), (u_longlong_t)nparity,
|
|
|
|
VDEV_DRAID_MAXPARITY);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Verify the requested number of spares can be satisfied.
|
|
|
|
* An arbitrary limit of 100 distributed spares is applied.
|
|
|
|
*/
|
|
|
|
if (nspares > 100 || nspares > (children - (ndata + nparity))) {
|
|
|
|
fprintf(stderr,
|
|
|
|
gettext("invalid number of dRAID spares %llu; additional "
|
|
|
|
"disks would be required\n"), (u_longlong_t)nspares);
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Verify the requested number children is sufficient. */
|
|
|
|
if (children < (ndata + nparity + nspares)) {
|
|
|
|
fprintf(stderr, gettext("%llu disks were provided, but at "
|
|
|
|
"least %llu disks are required for this config\n"),
|
|
|
|
(u_longlong_t)children,
|
|
|
|
(u_longlong_t)(ndata + nparity + nspares));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (children > VDEV_DRAID_MAX_CHILDREN) {
|
|
|
|
fprintf(stderr, gettext("%llu disks were provided, but "
|
|
|
|
"dRAID only supports up to %u disks"),
|
|
|
|
(u_longlong_t)children, VDEV_DRAID_MAX_CHILDREN);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Calculate the minimum number of groups required to fill a slice.
|
|
|
|
* This is the LCM of the stripe width (ndata + nparity) and the
|
|
|
|
* number of data drives (children - nspares).
|
|
|
|
*/
|
|
|
|
while (ngroups * (ndata + nparity) % (children - nspares) != 0)
|
|
|
|
ngroups++;
|
|
|
|
|
|
|
|
/* Store the basic dRAID configuration. */
|
|
|
|
fnvlist_add_uint64(nv, ZPOOL_CONFIG_NPARITY, nparity);
|
|
|
|
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NDATA, ndata);
|
|
|
|
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NSPARES, nspares);
|
|
|
|
fnvlist_add_uint64(nv, ZPOOL_CONFIG_DRAID_NGROUPS, ngroups);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* Construct a syntactically valid vdev specification,
|
|
|
|
* and ensure that all devices and files exist and can be opened.
|
|
|
|
* Note: we don't bother freeing anything in the error paths
|
|
|
|
* because the program is just going to exit anyway.
|
|
|
|
*/
|
2020-06-15 14:30:37 -04:00
|
|
|
static nvlist_t *
|
2011-06-16 21:56:38 +02:00
|
|
|
construct_spec(nvlist_t *props, int argc, char **argv)
|
2008-11-20 12:01:55 -08:00
|
|
|
{
|
|
|
|
nvlist_t *nvroot, *nv, **top, **spares, **l2cache;
|
2009-08-18 11:43:27 -07:00
|
|
|
int t, toplevels, mindev, maxdev, nspares, nlogs, nl2cache;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
const char *type, *fulltype;
|
|
|
|
boolean_t is_log, is_special, is_dedup, is_spare;
|
2008-11-20 12:01:55 -08:00
|
|
|
boolean_t seen_logs;
|
|
|
|
|
|
|
|
top = NULL;
|
|
|
|
toplevels = 0;
|
|
|
|
spares = NULL;
|
|
|
|
l2cache = NULL;
|
|
|
|
nspares = 0;
|
|
|
|
nlogs = 0;
|
|
|
|
nl2cache = 0;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
is_log = is_special = is_dedup = is_spare = B_FALSE;
|
2008-11-20 12:01:55 -08:00
|
|
|
seen_logs = B_FALSE;
|
2016-09-30 03:11:44 +08:00
|
|
|
nvroot = NULL;
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
while (argc > 0) {
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
fulltype = argv[0];
|
2008-11-20 12:01:55 -08:00
|
|
|
nv = NULL;
|
|
|
|
|
|
|
|
/*
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
* If it's a mirror, raidz, or draid the subsequent arguments
|
|
|
|
* are its leaves -- until we encounter the next mirror,
|
|
|
|
* raidz or draid.
|
2008-11-20 12:01:55 -08:00
|
|
|
*/
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if ((type = is_grouping(fulltype, &mindev, &maxdev)) != NULL) {
|
2008-11-20 12:01:55 -08:00
|
|
|
nvlist_t **child = NULL;
|
|
|
|
int c, children = 0;
|
|
|
|
|
|
|
|
if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
|
|
|
|
if (spares != NULL) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("invalid vdev "
|
|
|
|
"specification: 'spare' can be "
|
|
|
|
"specified only once\n"));
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
is_spare = B_TRUE;
|
2018-09-05 19:33:36 -06:00
|
|
|
is_log = is_special = is_dedup = B_FALSE;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(type, VDEV_TYPE_LOG) == 0) {
|
|
|
|
if (seen_logs) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("invalid vdev "
|
|
|
|
"specification: 'log' can be "
|
|
|
|
"specified only once\n"));
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
seen_logs = B_TRUE;
|
|
|
|
is_log = B_TRUE;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
is_special = is_dedup = is_spare = B_FALSE;
|
2008-11-20 12:01:55 -08:00
|
|
|
argc--;
|
|
|
|
argv++;
|
|
|
|
/*
|
|
|
|
* A log is not a real grouping device.
|
|
|
|
* We just set is_log and continue.
|
|
|
|
*/
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2018-09-05 19:33:36 -06:00
|
|
|
if (strcmp(type, VDEV_ALLOC_BIAS_SPECIAL) == 0) {
|
|
|
|
is_special = B_TRUE;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
is_log = is_dedup = is_spare = B_FALSE;
|
2018-09-05 19:33:36 -06:00
|
|
|
argc--;
|
|
|
|
argv++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(type, VDEV_ALLOC_BIAS_DEDUP) == 0) {
|
|
|
|
is_dedup = B_TRUE;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
is_log = is_special = is_spare = B_FALSE;
|
2018-09-05 19:33:36 -06:00
|
|
|
argc--;
|
|
|
|
argv++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
|
|
|
|
if (l2cache != NULL) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("invalid vdev "
|
|
|
|
"specification: 'cache' can be "
|
|
|
|
"specified only once\n"));
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
is_log = is_special = B_FALSE;
|
|
|
|
is_dedup = is_spare = B_FALSE;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
2018-09-05 19:33:36 -06:00
|
|
|
if (is_log || is_special || is_dedup) {
|
2008-11-20 12:01:55 -08:00
|
|
|
if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
|
|
|
|
(void) fprintf(stderr,
|
|
|
|
gettext("invalid vdev "
|
2018-09-05 19:33:36 -06:00
|
|
|
"specification: unsupported '%s' "
|
|
|
|
"device: %s\n"), is_log ? "log" :
|
|
|
|
"special", type);
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
nlogs++;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (c = 1; c < argc; c++) {
|
2009-08-18 11:43:27 -07:00
|
|
|
if (is_grouping(argv[c], NULL, NULL) != NULL)
|
2008-11-20 12:01:55 -08:00
|
|
|
break;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
children++;
|
|
|
|
child = realloc(child,
|
|
|
|
children * sizeof (nvlist_t *));
|
|
|
|
if (child == NULL)
|
|
|
|
zpool_no_memory();
|
2013-11-01 20:26:11 +01:00
|
|
|
if ((nv = make_leaf_vdev(props, argv[c],
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
!(is_log || is_special || is_dedup ||
|
|
|
|
is_spare))) == NULL) {
|
2016-07-19 18:24:24 +02:00
|
|
|
for (c = 0; c < children - 1; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2016-07-19 18:24:24 +02:00
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
child[children - 1] = nv;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (children < mindev) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid vdev "
|
|
|
|
"specification: %s requires at least %d "
|
|
|
|
"devices\n"), argv[0], mindev);
|
2016-07-19 18:24:24 +02:00
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
2009-08-18 11:43:27 -07:00
|
|
|
if (children > maxdev) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid vdev "
|
|
|
|
"specification: %s supports no more than "
|
|
|
|
"%d devices\n"), argv[0], maxdev);
|
2016-07-19 18:24:24 +02:00
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2009-08-18 11:43:27 -07:00
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
argc -= c;
|
|
|
|
argv += c;
|
|
|
|
|
|
|
|
if (strcmp(type, VDEV_TYPE_SPARE) == 0) {
|
|
|
|
spares = child;
|
|
|
|
nspares = children;
|
|
|
|
continue;
|
|
|
|
} else if (strcmp(type, VDEV_TYPE_L2CACHE) == 0) {
|
|
|
|
l2cache = child;
|
|
|
|
nl2cache = children;
|
|
|
|
continue;
|
|
|
|
} else {
|
2018-09-05 19:33:36 -06:00
|
|
|
/* create a top-level vdev with children */
|
2008-11-20 12:01:55 -08:00
|
|
|
verify(nvlist_alloc(&nv, NV_UNIQUE_NAME,
|
|
|
|
0) == 0);
|
|
|
|
verify(nvlist_add_string(nv, ZPOOL_CONFIG_TYPE,
|
|
|
|
type) == 0);
|
|
|
|
verify(nvlist_add_uint64(nv,
|
|
|
|
ZPOOL_CONFIG_IS_LOG, is_log) == 0);
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if (is_log) {
|
2018-09-05 19:33:36 -06:00
|
|
|
verify(nvlist_add_string(nv,
|
|
|
|
ZPOOL_CONFIG_ALLOCATION_BIAS,
|
|
|
|
VDEV_ALLOC_BIAS_LOG) == 0);
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
}
|
2018-09-05 19:33:36 -06:00
|
|
|
if (is_special) {
|
|
|
|
verify(nvlist_add_string(nv,
|
|
|
|
ZPOOL_CONFIG_ALLOCATION_BIAS,
|
|
|
|
VDEV_ALLOC_BIAS_SPECIAL) == 0);
|
|
|
|
}
|
|
|
|
if (is_dedup) {
|
|
|
|
verify(nvlist_add_string(nv,
|
|
|
|
ZPOOL_CONFIG_ALLOCATION_BIAS,
|
|
|
|
VDEV_ALLOC_BIAS_DEDUP) == 0);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
if (strcmp(type, VDEV_TYPE_RAIDZ) == 0) {
|
|
|
|
verify(nvlist_add_uint64(nv,
|
|
|
|
ZPOOL_CONFIG_NPARITY,
|
|
|
|
mindev - 1) == 0);
|
|
|
|
}
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if (strcmp(type, VDEV_TYPE_DRAID) == 0) {
|
|
|
|
if (draid_config_by_type(nv,
|
|
|
|
fulltype, children) != 0) {
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
|
|
|
goto spec_out;
|
|
|
|
}
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
verify(nvlist_add_nvlist_array(nv,
|
|
|
|
ZPOOL_CONFIG_CHILDREN, child,
|
|
|
|
children) == 0);
|
|
|
|
|
|
|
|
for (c = 0; c < children; c++)
|
|
|
|
nvlist_free(child[c]);
|
|
|
|
free(child);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We have a device. Pass off to make_leaf_vdev() to
|
|
|
|
* construct the appropriate nvlist describing the vdev.
|
|
|
|
*/
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
if ((nv = make_leaf_vdev(props, argv[0], !(is_log ||
|
|
|
|
is_special || is_dedup || is_spare))) == NULL)
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
|
|
|
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
verify(nvlist_add_uint64(nv,
|
|
|
|
ZPOOL_CONFIG_IS_LOG, is_log) == 0);
|
|
|
|
if (is_log) {
|
|
|
|
verify(nvlist_add_string(nv,
|
|
|
|
ZPOOL_CONFIG_ALLOCATION_BIAS,
|
|
|
|
VDEV_ALLOC_BIAS_LOG) == 0);
|
2008-11-20 12:01:55 -08:00
|
|
|
nlogs++;
|
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID. This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.
A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`. No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.
zpool create <pool> draid[1,2,3] <vdevs...>
Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons. The supported options include:
zpool create <pool> \
draid[<parity>][:<data>d][:<children>c][:<spares>s] \
<vdevs...>
- draid[parity] - Parity level (default 1)
- draid[:<data>d] - Data devices per group (default 8)
- draid[:<children>c] - Expected number of child vdevs
- draid[:<spares>s] - Distributed hot spares (default 0)
Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.
```
pool: tank
state: ONLINE
config:
NAME STATE READ WRITE CKSUM
slag7 ONLINE 0 0 0
draid2:8d:68c:2s-0 ONLINE 0 0 0
L0 ONLINE 0 0 0
L1 ONLINE 0 0 0
...
U25 ONLINE 0 0 0
U26 ONLINE 0 0 0
spare-53 ONLINE 0 0 0
U27 ONLINE 0 0 0
draid2-0-0 ONLINE 0 0 0
U28 ONLINE 0 0 0
U29 ONLINE 0 0 0
...
U42 ONLINE 0 0 0
U43 ONLINE 0 0 0
special
mirror-1 ONLINE 0 0 0
L5 ONLINE 0 0 0
U5 ONLINE 0 0 0
mirror-2 ONLINE 0 0 0
L6 ONLINE 0 0 0
U6 ONLINE 0 0 0
spares
draid2-0-0 INUSE currently in use
draid2-0-1 AVAIL
```
When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command. These options are leverages
by zloop.sh to test a wide range of dRAID configurations.
-K draid|raidz|random - kind of RAID to test
-D <value> - dRAID data drives per group
-S <value> - dRAID distributed hot spares
-R <value> - RAID parity (raidz or dRAID)
The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.
Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00
|
|
|
}
|
|
|
|
|
2018-09-05 19:33:36 -06:00
|
|
|
if (is_special) {
|
|
|
|
verify(nvlist_add_string(nv,
|
|
|
|
ZPOOL_CONFIG_ALLOCATION_BIAS,
|
|
|
|
VDEV_ALLOC_BIAS_SPECIAL) == 0);
|
|
|
|
}
|
|
|
|
if (is_dedup) {
|
|
|
|
verify(nvlist_add_string(nv,
|
|
|
|
ZPOOL_CONFIG_ALLOCATION_BIAS,
|
|
|
|
VDEV_ALLOC_BIAS_DEDUP) == 0);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
argc--;
|
|
|
|
argv++;
|
|
|
|
}
|
|
|
|
|
|
|
|
toplevels++;
|
|
|
|
top = realloc(top, toplevels * sizeof (nvlist_t *));
|
|
|
|
if (top == NULL)
|
|
|
|
zpool_no_memory();
|
|
|
|
top[toplevels - 1] = nv;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (toplevels == 0 && nspares == 0 && nl2cache == 0) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid vdev "
|
|
|
|
"specification: at least one toplevel vdev must be "
|
|
|
|
"specified\n"));
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (seen_logs && nlogs == 0) {
|
|
|
|
(void) fprintf(stderr, gettext("invalid vdev specification: "
|
|
|
|
"log requires at least 1 device\n"));
|
2016-09-30 03:11:44 +08:00
|
|
|
goto spec_out;
|
2008-11-20 12:01:55 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Finally, create nvroot and add all top-level vdevs to it.
|
|
|
|
*/
|
|
|
|
verify(nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) == 0);
|
|
|
|
verify(nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE,
|
|
|
|
VDEV_TYPE_ROOT) == 0);
|
|
|
|
verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
top, toplevels) == 0);
|
|
|
|
if (nspares != 0)
|
|
|
|
verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES,
|
|
|
|
spares, nspares) == 0);
|
|
|
|
if (nl2cache != 0)
|
|
|
|
verify(nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_L2CACHE,
|
|
|
|
l2cache, nl2cache) == 0);
|
|
|
|
|
2016-09-30 03:11:44 +08:00
|
|
|
spec_out:
|
2008-11-20 12:01:55 -08:00
|
|
|
for (t = 0; t < toplevels; t++)
|
|
|
|
nvlist_free(top[t]);
|
|
|
|
for (t = 0; t < nspares; t++)
|
|
|
|
nvlist_free(spares[t]);
|
|
|
|
for (t = 0; t < nl2cache; t++)
|
|
|
|
nvlist_free(l2cache[t]);
|
2016-09-30 03:11:44 +08:00
|
|
|
|
|
|
|
free(spares);
|
|
|
|
free(l2cache);
|
2008-11-20 12:01:55 -08:00
|
|
|
free(top);
|
|
|
|
|
|
|
|
return (nvroot);
|
|
|
|
}
|
|
|
|
|
2010-05-28 13:45:14 -07:00
|
|
|
nvlist_t *
|
|
|
|
split_mirror_vdev(zpool_handle_t *zhp, char *newname, nvlist_t *props,
|
|
|
|
splitflags_t flags, int argc, char **argv)
|
|
|
|
{
|
|
|
|
nvlist_t *newroot = NULL, **child;
|
|
|
|
uint_t c, children;
|
|
|
|
|
|
|
|
if (argc > 0) {
|
2011-06-16 21:56:38 +02:00
|
|
|
if ((newroot = construct_spec(props, argc, argv)) == NULL) {
|
2010-05-28 13:45:14 -07:00
|
|
|
(void) fprintf(stderr, gettext("Unable to build a "
|
|
|
|
"pool from the specified devices\n"));
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!flags.dryrun && make_disks(zhp, newroot) != 0) {
|
|
|
|
nvlist_free(newroot);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* avoid any tricks in the spec */
|
|
|
|
verify(nvlist_lookup_nvlist_array(newroot,
|
|
|
|
ZPOOL_CONFIG_CHILDREN, &child, &children) == 0);
|
|
|
|
for (c = 0; c < children; c++) {
|
|
|
|
char *path;
|
|
|
|
const char *type;
|
|
|
|
int min, max;
|
|
|
|
|
|
|
|
verify(nvlist_lookup_string(child[c],
|
|
|
|
ZPOOL_CONFIG_PATH, &path) == 0);
|
|
|
|
if ((type = is_grouping(path, &min, &max)) != NULL) {
|
|
|
|
(void) fprintf(stderr, gettext("Cannot use "
|
|
|
|
"'%s' as a device for splitting\n"), type);
|
|
|
|
nvlist_free(newroot);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (zpool_vdev_split(zhp, newname, &newroot, props, flags) != 0) {
|
2016-03-31 23:54:07 -04:00
|
|
|
nvlist_free(newroot);
|
2010-05-28 13:45:14 -07:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (newroot);
|
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
|
2018-09-05 19:33:36 -06:00
|
|
|
static int
|
|
|
|
num_normal_vdevs(nvlist_t *nvroot)
|
|
|
|
{
|
|
|
|
nvlist_t **top;
|
|
|
|
uint_t t, toplevels, normal = 0;
|
|
|
|
|
|
|
|
verify(nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
|
|
|
&top, &toplevels) == 0);
|
|
|
|
|
|
|
|
for (t = 0; t < toplevels; t++) {
|
|
|
|
uint64_t log = B_FALSE;
|
|
|
|
|
|
|
|
(void) nvlist_lookup_uint64(top[t], ZPOOL_CONFIG_IS_LOG, &log);
|
|
|
|
if (log)
|
|
|
|
continue;
|
|
|
|
if (nvlist_exists(top[t], ZPOOL_CONFIG_ALLOCATION_BIAS))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
normal++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (normal);
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* Get and validate the contents of the given vdev specification. This ensures
|
|
|
|
* that the nvlist returned is well-formed, that all the devices exist, and that
|
|
|
|
* they are not currently in use by any other known consumer. The 'poolconfig'
|
|
|
|
* parameter is the current configuration of the pool when adding devices
|
|
|
|
* existing pool, and is used to perform additional checks, such as changing the
|
|
|
|
* replication level of the pool. It can be 'NULL' to indicate that this is a
|
|
|
|
* new pool. The 'force' flag controls whether devices should be forcefully
|
|
|
|
* added, even if they appear in use.
|
|
|
|
*/
|
|
|
|
nvlist_t *
|
2011-06-16 21:56:38 +02:00
|
|
|
make_root_vdev(zpool_handle_t *zhp, nvlist_t *props, int force, int check_rep,
|
2010-05-28 13:45:14 -07:00
|
|
|
boolean_t replacing, boolean_t dryrun, int argc, char **argv)
|
2008-11-20 12:01:55 -08:00
|
|
|
{
|
|
|
|
nvlist_t *newroot;
|
|
|
|
nvlist_t *poolconfig = NULL;
|
|
|
|
is_force = force;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Construct the vdev specification. If this is successful, we know
|
|
|
|
* that we have a valid specification, and that all devices can be
|
|
|
|
* opened.
|
|
|
|
*/
|
2011-06-16 21:56:38 +02:00
|
|
|
if ((newroot = construct_spec(props, argc, argv)) == NULL)
|
2008-11-20 12:01:55 -08:00
|
|
|
return (NULL);
|
|
|
|
|
2015-04-25 22:08:29 -06:00
|
|
|
if (zhp && ((poolconfig = zpool_get_config(zhp, NULL)) == NULL)) {
|
|
|
|
nvlist_free(newroot);
|
2008-11-20 12:01:55 -08:00
|
|
|
return (NULL);
|
2015-04-25 22:08:29 -06:00
|
|
|
}
|
2008-11-20 12:01:55 -08:00
|
|
|
|
|
|
|
/*
|
2019-08-30 18:43:30 +02:00
|
|
|
* Validate each device to make sure that it's not shared with another
|
2008-11-20 12:01:55 -08:00
|
|
|
* subsystem. We do this even if 'force' is set, because there are some
|
|
|
|
* uses (such as a dedicated dump device) that even '-f' cannot
|
|
|
|
* override.
|
|
|
|
*/
|
2015-07-05 21:17:49 +02:00
|
|
|
if (is_device_in_use(poolconfig, newroot, force, replacing, B_FALSE)) {
|
2008-11-20 12:01:55 -08:00
|
|
|
nvlist_free(newroot);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Check the replication level of the given vdevs and report any errors
|
|
|
|
* found. We include the existing pool spec, if any, as we need to
|
|
|
|
* catch changes against the existing replication level.
|
|
|
|
*/
|
|
|
|
if (check_rep && check_replication(poolconfig, newroot) != 0) {
|
|
|
|
nvlist_free(newroot);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2018-09-05 19:33:36 -06:00
|
|
|
/*
|
|
|
|
* On pool create the new vdev spec must have one normal vdev.
|
|
|
|
*/
|
|
|
|
if (poolconfig == NULL && num_normal_vdevs(newroot) == 0) {
|
|
|
|
vdev_error(gettext("at least one general top-level vdev must "
|
|
|
|
"be specified\n"));
|
|
|
|
nvlist_free(newroot);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
2008-11-20 12:01:55 -08:00
|
|
|
/*
|
|
|
|
* Run through the vdev specification and label any whole disks found.
|
|
|
|
*/
|
2008-12-03 12:09:06 -08:00
|
|
|
if (!dryrun && make_disks(zhp, newroot) != 0) {
|
2008-11-20 12:01:55 -08:00
|
|
|
nvlist_free(newroot);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
return (newroot);
|
|
|
|
}
|