freebsd-nq/module/zcommon/zfs_namecheck.c
Brian Behlendorf b2255edcc0
Distributed Spare (dRAID) Feature
This patch adds a new top-level vdev type called dRAID, which stands
for Distributed parity RAID.  This pool configuration allows all dRAID
vdevs to participate when rebuilding to a distributed hot spare device.
This can substantially reduce the total time required to restore full
parity to pool with a failed device.

A dRAID pool can be created using the new top-level `draid` type.
Like `raidz`, the desired redundancy is specified after the type:
`draid[1,2,3]`.  No additional information is required to create the
pool and reasonable default values will be chosen based on the number
of child vdevs in the dRAID vdev.

    zpool create <pool> draid[1,2,3] <vdevs...>

Unlike raidz, additional optional dRAID configuration values can be
provided as part of the draid type as colon separated values. This
allows administrators to fully specify a layout for either performance
or capacity reasons.  The supported options include:

    zpool create <pool> \
        draid[<parity>][:<data>d][:<children>c][:<spares>s] \
        <vdevs...>

    - draid[parity]       - Parity level (default 1)
    - draid[:<data>d]     - Data devices per group (default 8)
    - draid[:<children>c] - Expected number of child vdevs
    - draid[:<spares>s]   - Distributed hot spares (default 0)

Abbreviated example `zpool status` output for a 68 disk dRAID pool
with two distributed spares using special allocation classes.

```
  pool: tank
 state: ONLINE
config:

    NAME                  STATE     READ WRITE CKSUM
    slag7                 ONLINE       0     0     0
      draid2:8d:68c:2s-0  ONLINE       0     0     0
        L0                ONLINE       0     0     0
        L1                ONLINE       0     0     0
        ...
        U25               ONLINE       0     0     0
        U26               ONLINE       0     0     0
        spare-53          ONLINE       0     0     0
          U27             ONLINE       0     0     0
          draid2-0-0      ONLINE       0     0     0
        U28               ONLINE       0     0     0
        U29               ONLINE       0     0     0
        ...
        U42               ONLINE       0     0     0
        U43               ONLINE       0     0     0
    special
      mirror-1            ONLINE       0     0     0
        L5                ONLINE       0     0     0
        U5                ONLINE       0     0     0
      mirror-2            ONLINE       0     0     0
        L6                ONLINE       0     0     0
        U6                ONLINE       0     0     0
    spares
      draid2-0-0          INUSE     currently in use
      draid2-0-1          AVAIL
```

When adding test coverage for the new dRAID vdev type the following
options were added to the ztest command.  These options are leverages
by zloop.sh to test a wide range of dRAID configurations.

    -K draid|raidz|random - kind of RAID to test
    -D <value>            - dRAID data drives per group
    -S <value>            - dRAID distributed hot spares
    -R <value>            - RAID parity (raidz or dRAID)

The zpool_create, zpool_import, redundancy, replacement and fault
test groups have all been updated provide test coverage for the
dRAID feature.

Co-authored-by: Isaac Huang <he.huang@intel.com>
Co-authored-by: Mark Maybee <mmaybee@cray.com>
Co-authored-by: Don Brady <don.brady@delphix.com>
Co-authored-by: Matthew Ahrens <mahrens@delphix.com>
Co-authored-by: Brian Behlendorf <behlendorf1@llnl.gov>
Reviewed-by: Mark Maybee <mmaybee@cray.com>
Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Tony Hutter <hutter2@llnl.gov>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #10102
2020-11-13 13:51:51 -08:00

474 lines
10 KiB
C

/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
*/
/*
* Common name validation routines for ZFS. These routines are shared by the
* userland code as well as the ioctl() layer to ensure that we don't
* inadvertently expose a hole through direct ioctl()s that never gets tested.
* In userland, however, we want significantly more information about _why_ the
* name is invalid. In the kernel, we only care whether it's valid or not.
* Each routine therefore takes a 'namecheck_err_t' which describes exactly why
* the name failed to validate.
*/
#if !defined(_KERNEL)
#include <string.h>
#endif
#include <sys/dsl_dir.h>
#include <sys/param.h>
#include <sys/nvpair.h>
#include "zfs_namecheck.h"
#include "zfs_deleg.h"
/*
* Deeply nested datasets can overflow the stack, so we put a limit
* in the amount of nesting a path can have. zfs_max_dataset_nesting
* can be tuned temporarily to fix existing datasets that exceed our
* predefined limit.
*/
int zfs_max_dataset_nesting = 50;
static int
valid_char(char c)
{
return ((c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
(c >= '0' && c <= '9') ||
c == '-' || c == '_' || c == '.' || c == ':' || c == ' ');
}
/*
* Looks at a path and returns its level of nesting (depth).
*/
int
get_dataset_depth(const char *path)
{
const char *loc = path;
int nesting = 0;
/*
* Keep track of nesting until you hit the end of the
* path or found the snapshot/bookmark separator.
*/
for (int i = 0; loc[i] != '\0' &&
loc[i] != '@' &&
loc[i] != '#'; i++) {
if (loc[i] == '/')
nesting++;
}
return (nesting);
}
/*
* Snapshot names must be made up of alphanumeric characters plus the following
* characters:
*
* [-_.: ]
*
* Returns 0 on success, -1 on error.
*/
int
zfs_component_namecheck(const char *path, namecheck_err_t *why, char *what)
{
const char *loc;
if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
}
if (path[0] == '\0') {
if (why)
*why = NAME_ERR_EMPTY_COMPONENT;
return (-1);
}
for (loc = path; *loc; loc++) {
if (!valid_char(*loc)) {
if (why) {
*why = NAME_ERR_INVALCHAR;
*what = *loc;
}
return (-1);
}
}
return (0);
}
/*
* Permissions set name must start with the letter '@' followed by the
* same character restrictions as snapshot names, except that the name
* cannot exceed 64 characters.
*
* Returns 0 on success, -1 on error.
*/
int
permset_namecheck(const char *path, namecheck_err_t *why, char *what)
{
if (strlen(path) >= ZFS_PERMSET_MAXLEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
}
if (path[0] != '@') {
if (why) {
*why = NAME_ERR_NO_AT;
*what = path[0];
}
return (-1);
}
return (zfs_component_namecheck(&path[1], why, what));
}
/*
* Dataset paths should not be deeper than zfs_max_dataset_nesting
* in terms of nesting.
*
* Returns 0 on success, -1 on error.
*/
int
dataset_nestcheck(const char *path)
{
return ((get_dataset_depth(path) < zfs_max_dataset_nesting) ? 0 : -1);
}
/*
* Entity names must be of the following form:
*
* [component/]*[component][(@|#)component]?
*
* Where each component is made up of alphanumeric characters plus the following
* characters:
*
* [-_.: %]
*
* We allow '%' here as we use that character internally to create unique
* names for temporary clones (for online recv).
*
* Returns 0 on success, -1 on error.
*/
int
entity_namecheck(const char *path, namecheck_err_t *why, char *what)
{
const char *end;
EQUIV(why == NULL, what == NULL);
/*
* Make sure the name is not too long.
*/
if (strlen(path) >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
}
/* Explicitly check for a leading slash. */
if (path[0] == '/') {
if (why)
*why = NAME_ERR_LEADING_SLASH;
return (-1);
}
if (path[0] == '\0') {
if (why)
*why = NAME_ERR_EMPTY_COMPONENT;
return (-1);
}
const char *start = path;
boolean_t found_delim = B_FALSE;
for (;;) {
/* Find the end of this component */
end = start;
while (*end != '/' && *end != '@' && *end != '#' &&
*end != '\0')
end++;
if (*end == '\0' && end[-1] == '/') {
/* trailing slashes are not allowed */
if (why)
*why = NAME_ERR_TRAILING_SLASH;
return (-1);
}
/* Validate the contents of this component */
for (const char *loc = start; loc != end; loc++) {
if (!valid_char(*loc) && *loc != '%') {
if (why) {
*why = NAME_ERR_INVALCHAR;
*what = *loc;
}
return (-1);
}
}
if (*end == '\0' || *end == '/') {
int component_length = end - start;
/* Validate the contents of this component is not '.' */
if (component_length == 1) {
if (start[0] == '.') {
if (why)
*why = NAME_ERR_SELF_REF;
return (-1);
}
}
/* Validate the content of this component is not '..' */
if (component_length == 2) {
if (start[0] == '.' && start[1] == '.') {
if (why)
*why = NAME_ERR_PARENT_REF;
return (-1);
}
}
}
/* Snapshot or bookmark delimiter found */
if (*end == '@' || *end == '#') {
/* Multiple delimiters are not allowed */
if (found_delim != 0) {
if (why)
*why = NAME_ERR_MULTIPLE_DELIMITERS;
return (-1);
}
found_delim = B_TRUE;
}
/* Zero-length components are not allowed */
if (start == end) {
if (why)
*why = NAME_ERR_EMPTY_COMPONENT;
return (-1);
}
/* If we've reached the end of the string, we're OK */
if (*end == '\0')
return (0);
/*
* If there is a '/' in a snapshot or bookmark name
* then report an error
*/
if (*end == '/' && found_delim != 0) {
if (why)
*why = NAME_ERR_TRAILING_SLASH;
return (-1);
}
/* Update to the next component */
start = end + 1;
}
}
/*
* Dataset is any entity, except bookmark
*/
int
dataset_namecheck(const char *path, namecheck_err_t *why, char *what)
{
int ret = entity_namecheck(path, why, what);
if (ret == 0 && strchr(path, '#') != NULL) {
if (why != NULL) {
*why = NAME_ERR_INVALCHAR;
*what = '#';
}
return (-1);
}
return (ret);
}
/*
* Assert path is a valid bookmark name
*/
int
bookmark_namecheck(const char *path, namecheck_err_t *why, char *what)
{
int ret = entity_namecheck(path, why, what);
if (ret == 0 && strchr(path, '#') == NULL) {
if (why != NULL) {
*why = NAME_ERR_NO_POUND;
*what = '#';
}
return (-1);
}
return (ret);
}
/*
* Assert path is a valid snapshot name
*/
int
snapshot_namecheck(const char *path, namecheck_err_t *why, char *what)
{
int ret = entity_namecheck(path, why, what);
if (ret == 0 && strchr(path, '@') == NULL) {
if (why != NULL) {
*why = NAME_ERR_NO_AT;
*what = '@';
}
return (-1);
}
return (ret);
}
/*
* mountpoint names must be of the following form:
*
* /[component][/]*[component][/]
*
* Returns 0 on success, -1 on error.
*/
int
mountpoint_namecheck(const char *path, namecheck_err_t *why)
{
const char *start, *end;
/*
* Make sure none of the mountpoint component names are too long.
* If a component name is too long then the mkdir of the mountpoint
* will fail but then the mountpoint property will be set to a value
* that can never be mounted. Better to fail before setting the prop.
* Extra slashes are OK, they will be tossed by the mountpoint mkdir.
*/
if (path == NULL || *path != '/') {
if (why)
*why = NAME_ERR_LEADING_SLASH;
return (-1);
}
/* Skip leading slash */
start = &path[1];
do {
end = start;
while (*end != '/' && *end != '\0')
end++;
if (end - start >= ZFS_MAX_DATASET_NAME_LEN) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
}
start = end + 1;
} while (*end != '\0');
return (0);
}
/*
* For pool names, we have the same set of valid characters as described in
* dataset names, with the additional restriction that the pool name must begin
* with a letter. The pool names 'raidz' and 'mirror' are also reserved names
* that cannot be used.
*
* Returns 0 on success, -1 on error.
*/
int
pool_namecheck(const char *pool, namecheck_err_t *why, char *what)
{
const char *c;
/*
* Make sure the name is not too long.
* If we're creating a pool with version >= SPA_VERSION_DSL_SCRUB (v11)
* we need to account for additional space needed by the origin ds which
* will also be snapshotted: "poolname"+"/"+"$ORIGIN"+"@"+"$ORIGIN".
* Play it safe and enforce this limit even if the pool version is < 11
* so it can be upgraded without issues.
*/
if (strlen(pool) >= (ZFS_MAX_DATASET_NAME_LEN - 2 -
strlen(ORIGIN_DIR_NAME) * 2)) {
if (why)
*why = NAME_ERR_TOOLONG;
return (-1);
}
c = pool;
while (*c != '\0') {
if (!valid_char(*c)) {
if (why) {
*why = NAME_ERR_INVALCHAR;
*what = *c;
}
return (-1);
}
c++;
}
if (!(*pool >= 'a' && *pool <= 'z') &&
!(*pool >= 'A' && *pool <= 'Z')) {
if (why)
*why = NAME_ERR_NOLETTER;
return (-1);
}
if (strcmp(pool, "mirror") == 0 ||
strcmp(pool, "raidz") == 0 ||
strcmp(pool, "draid") == 0) {
if (why)
*why = NAME_ERR_RESERVED;
return (-1);
}
if (pool[0] == 'c' && (pool[1] >= '0' && pool[1] <= '9')) {
if (why)
*why = NAME_ERR_DISKLIKE;
return (-1);
}
return (0);
}
EXPORT_SYMBOL(entity_namecheck);
EXPORT_SYMBOL(pool_namecheck);
EXPORT_SYMBOL(dataset_namecheck);
EXPORT_SYMBOL(bookmark_namecheck);
EXPORT_SYMBOL(snapshot_namecheck);
EXPORT_SYMBOL(zfs_component_namecheck);
EXPORT_SYMBOL(dataset_nestcheck);
EXPORT_SYMBOL(get_dataset_depth);
EXPORT_SYMBOL(zfs_max_dataset_nesting);
ZFS_MODULE_PARAM(zfs, zfs_, max_dataset_nesting, INT, ZMOD_RW,
"Limit to the amount of nesting a path can have. Defaults to 50.");