Introduce "feature flags" for ZFS pools (bump SPA version to 5000).

Add first feature "com.delphix:async_destroy" (asynchronous destroy
of ZFS datasets).
Implement features support in ZFS boot code.

Illumos revisions merged:
13700:2889e2596bd6
13701:1949b688d5fb
2619 asynchronous destruction of ZFS file systems
2747 SPA versioning with zfs feature flags

References:
https://www.illumos.org/issues/2619
https://www.illumos.org/issues/2747

Obtained from:	illumos (issue #2619, #2747)
MFC after:	1 month
This commit is contained in:
Martin Matuska 2012-06-11 11:35:22 +00:00
parent 7561cb5c8b
commit 2d9cf57e18
69 changed files with 4277 additions and 476 deletions

View File

@ -24,6 +24,17 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW:
disable the most expensive debugging functionality run
"ln -s 'abort:false,junk:false' /etc/malloc.conf".)
20120611:
A new version of ZFS (pool version 5000) has been merged to -HEAD.
Starting with this version the old system of ZFS pool versioning
is superseded by "feature flags". This concept enables forward
compatibility against certain future changes in functionality of ZFS
pools. The first read-only compatible "feature flag" for ZFS pools
is named "com.delphix:async_destroy". For more information
read the new zpool-features(5) manual page.
Please refer to the "ZFS notes" section of this file for information
on upgrading boot ZFS pools.
20120417:
The malloc(3) implementation embedded in libc now uses sources imported
as contrib/jemalloc. The most disruptive API change is to

View File

@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <stdio.h>
@ -54,6 +56,7 @@
#include <sys/zfs_fuid.h>
#include <sys/arc.h>
#include <sys/ddt.h>
#include <sys/zfeature.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
@ -63,7 +66,8 @@
#define ZDB_CHECKSUM_NAME(idx) ((idx) < ZIO_CHECKSUM_FUNCTIONS ? \
zio_checksum_table[(idx)].ci_name : "UNKNOWN")
#define ZDB_OT_NAME(idx) ((idx) < DMU_OT_NUMTYPES ? \
dmu_ot[(idx)].ot_name : "UNKNOWN")
dmu_ot[(idx)].ot_name : DMU_OT_IS_VALID(idx) ? \
dmu_ot_byteswap[DMU_OT_BYTESWAP(idx)].ob_name : "UNKNOWN")
#define ZDB_OT_TYPE(idx) ((idx) < DMU_OT_NUMTYPES ? (idx) : DMU_OT_NUMTYPES)
#ifndef lint
@ -1088,7 +1092,7 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
ASSERT(size == sizeof (*ds));
crtime = ds->ds_creation_time;
zdb_nicenum(ds->ds_used_bytes, used);
zdb_nicenum(ds->ds_referenced_bytes, used);
zdb_nicenum(ds->ds_compressed_bytes, compressed);
zdb_nicenum(ds->ds_uncompressed_bytes, uncompressed);
zdb_nicenum(ds->ds_unique_bytes, unique);
@ -1130,6 +1134,44 @@ dump_dsl_dataset(objset_t *os, uint64_t object, void *data, size_t size)
(void) printf("\t\tbp = %s\n", blkbuf);
}
/* ARGSUSED */
static int
dump_bptree_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
char blkbuf[BP_SPRINTF_LEN];
if (bp->blk_birth != 0) {
sprintf_blkptr(blkbuf, bp);
(void) printf("\t%s\n", blkbuf);
}
return (0);
}
static void
dump_bptree(objset_t *os, uint64_t obj, char *name)
{
char bytes[32];
bptree_phys_t *bt;
dmu_buf_t *db;
if (dump_opt['d'] < 3)
return;
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
bt = db->db_data;
zdb_nicenum(bt->bt_bytes, bytes);
(void) printf("\n %s: %llu datasets, %s\n",
name, (unsigned long long)(bt->bt_end - bt->bt_begin), bytes);
dmu_buf_rele(db, FTAG);
if (dump_opt['d'] < 5)
return;
(void) printf("\n");
(void) bptree_iterate(os, obj, B_FALSE, dump_bptree_cb, NULL, NULL);
}
/* ARGSUSED */
static int
dump_bpobj_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
@ -1883,11 +1925,13 @@ typedef struct zdb_blkstats {
*/
#define ZDB_OT_DEFERRED (DMU_OT_NUMTYPES + 0)
#define ZDB_OT_DITTO (DMU_OT_NUMTYPES + 1)
#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 2)
#define ZDB_OT_OTHER (DMU_OT_NUMTYPES + 2)
#define ZDB_OT_TOTAL (DMU_OT_NUMTYPES + 3)
static char *zdb_ot_extname[] = {
"deferred free",
"dedup ditto",
"other",
"Total",
};
@ -1968,9 +2012,10 @@ zdb_blkptr_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
type = BP_GET_TYPE(bp);
zdb_count_block(zcb, zilog, bp, type);
zdb_count_block(zcb, zilog, bp,
(type & DMU_OT_NEWTYPE) ? ZDB_OT_OTHER : type);
is_metadata = (BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata);
is_metadata = (BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type));
if (dump_opt['c'] > 1 || (dump_opt['c'] && is_metadata)) {
int ioerr;
@ -2197,6 +2242,12 @@ dump_block_stats(spa_t *spa)
(void) bpobj_iterate_nofree(&spa->spa_dsl_pool->dp_free_bpobj,
count_block_cb, &zcb, NULL);
}
if (spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
VERIFY3U(0, ==, bptree_iterate(spa->spa_meta_objset,
spa->spa_dsl_pool->dp_bptree_obj, B_FALSE, count_block_cb,
&zcb, NULL));
}
if (dump_opt['c'] > 1)
flags |= TRAVERSE_PREFETCH_DATA;
@ -2373,7 +2424,7 @@ zdb_ddt_add_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
}
if (BP_IS_HOLE(bp) || BP_GET_CHECKSUM(bp) == ZIO_CHECKSUM_OFF ||
BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata)
BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))
return (0);
ddt_key_fill(&zdde_search.zdde_key, bp);
@ -2478,7 +2529,14 @@ dump_zpool(spa_t *spa)
dump_bpobj(&spa->spa_deferred_bpobj, "Deferred frees");
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
dump_bpobj(&spa->spa_dsl_pool->dp_free_bpobj,
"Pool frees");
"Pool snapshot frees");
}
if (spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
dump_bptree(spa->spa_meta_objset,
spa->spa_dsl_pool->dp_bptree_obj,
"Pool dataset frees");
}
dump_dtl(spa->spa_root_vdev, 0);
}

View File

@ -0,0 +1,533 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
* zhack is a debugging tool that can write changes to ZFS pool using libzpool
* for testing purposes. Altering pools with zhack is unsupported and may
* result in corrupted pools.
*/
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <sys/zfs_context.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
#include <sys/dmu.h>
#include <sys/zap.h>
#include <sys/zfs_znode.h>
#include <sys/dsl_synctask.h>
#include <sys/vdev.h>
#include <sys/fs/zfs.h>
#include <sys/dmu_objset.h>
#include <sys/dsl_pool.h>
#include <sys/zio_checksum.h>
#include <sys/zio_compress.h>
#include <sys/zfeature.h>
#undef ZFS_MAXNAMELEN
#undef verify
#include <libzfs.h>
extern boolean_t zfeature_checks_disable;
const char cmdname[] = "zhack";
libzfs_handle_t *g_zfs;
static importargs_t g_importargs;
static char *g_pool;
static boolean_t g_readonly;
static void
usage(void)
{
(void) fprintf(stderr,
"Usage: %s [-c cachefile] [-d dir] <subcommand> <args> ...\n"
"where <subcommand> <args> is one of the following:\n"
"\n", cmdname);
(void) fprintf(stderr,
" feature stat <pool>\n"
" print information about enabled features\n"
" feature enable [-d desc] <pool> <feature>\n"
" add a new enabled feature to the pool\n"
" -d <desc> sets the feature's description\n"
" feature ref [-md] <pool> <feature>\n"
" change the refcount on the given feature\n"
" -d decrease instead of increase the refcount\n"
" -m add the feature to the label if increasing refcount\n"
"\n"
" <feature> : should be a feature guid\n");
exit(1);
}
static void
fatal(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
(void) fprintf(stderr, "%s: ", cmdname);
(void) vfprintf(stderr, fmt, ap);
va_end(ap);
(void) fprintf(stderr, "\n");
exit(1);
}
/* ARGSUSED */
static int
space_delta_cb(dmu_object_type_t bonustype, void *data,
uint64_t *userp, uint64_t *groupp)
{
/*
* Is it a valid type of object to track?
*/
if (bonustype != DMU_OT_ZNODE && bonustype != DMU_OT_SA)
return (ENOENT);
(void) fprintf(stderr, "modifying object that needs user accounting");
abort();
/* NOTREACHED */
}
/*
* Target is the dataset whose pool we want to open.
*/
static void
import_pool(const char *target, boolean_t readonly)
{
nvlist_t *config;
nvlist_t *pools;
int error;
char *sepp;
spa_t *spa;
nvpair_t *elem;
nvlist_t *props;
const char *name;
kernel_init(readonly ? FREAD : (FREAD | FWRITE));
g_zfs = libzfs_init();
ASSERT(g_zfs != NULL);
dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
g_readonly = readonly;
/*
* If we only want readonly access, it's OK if we find
* a potentially-active (ie, imported into the kernel) pool from the
* default cachefile.
*/
if (readonly && spa_open(target, &spa, FTAG) == 0) {
spa_close(spa, FTAG);
return;
}
g_importargs.unique = B_TRUE;
g_importargs.can_be_active = readonly;
g_pool = strdup(target);
if ((sepp = strpbrk(g_pool, "/@")) != NULL)
*sepp = '\0';
g_importargs.poolname = g_pool;
pools = zpool_search_import(g_zfs, &g_importargs);
if (pools == NULL || nvlist_next_nvpair(pools, NULL) == NULL) {
if (!g_importargs.can_be_active) {
g_importargs.can_be_active = B_TRUE;
if (zpool_search_import(g_zfs, &g_importargs) != NULL ||
spa_open(target, &spa, FTAG) == 0) {
fatal("cannot import '%s': pool is active; run "
"\"zpool export %s\" first\n",
g_pool, g_pool);
}
}
fatal("cannot import '%s': no such pool available\n", g_pool);
}
elem = nvlist_next_nvpair(pools, NULL);
name = nvpair_name(elem);
verify(nvpair_value_nvlist(elem, &config) == 0);
props = NULL;
if (readonly) {
verify(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
verify(nvlist_add_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
}
zfeature_checks_disable = B_TRUE;
error = spa_import(name, config, props, ZFS_IMPORT_NORMAL);
zfeature_checks_disable = B_FALSE;
if (error == EEXIST)
error = 0;
if (error)
fatal("can't import '%s': %s", name, strerror(error));
}
static void
zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa)
{
int err;
import_pool(target, readonly);
zfeature_checks_disable = B_TRUE;
err = spa_open(target, spa, tag);
zfeature_checks_disable = B_FALSE;
if (err != 0)
fatal("cannot open '%s': %s", target, strerror(err));
if (spa_version(*spa) < SPA_VERSION_FEATURES) {
fatal("'%s' has version %d, features not enabled", target,
(int)spa_version(*spa));
}
}
static void
dump_obj(objset_t *os, uint64_t obj, const char *name)
{
zap_cursor_t zc;
zap_attribute_t za;
(void) printf("%s_obj:\n", name);
for (zap_cursor_init(&zc, os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
if (za.za_integer_length == 8) {
ASSERT(za.za_num_integers == 1);
(void) printf("\t%s = %llu\n",
za.za_name, (u_longlong_t)za.za_first_integer);
} else {
ASSERT(za.za_integer_length == 1);
char val[1024];
VERIFY(zap_lookup(os, obj, za.za_name,
1, sizeof (val), val) == 0);
(void) printf("\t%s = %s\n", za.za_name, val);
}
}
zap_cursor_fini(&zc);
}
static void
dump_mos(spa_t *spa)
{
nvlist_t *nv = spa->spa_label_features;
(void) printf("label config:\n");
for (nvpair_t *pair = nvlist_next_nvpair(nv, NULL);
pair != NULL;
pair = nvlist_next_nvpair(nv, pair)) {
(void) printf("\t%s\n", nvpair_name(pair));
}
}
static void
zhack_do_feature_stat(int argc, char **argv)
{
spa_t *spa;
objset_t *os;
char *target;
argc--;
argv++;
if (argc < 1) {
(void) fprintf(stderr, "error: missing pool name\n");
usage();
}
target = argv[0];
zhack_spa_open(target, B_TRUE, FTAG, &spa);
os = spa->spa_meta_objset;
dump_obj(os, spa->spa_feat_for_read_obj, "for_read");
dump_obj(os, spa->spa_feat_for_write_obj, "for_write");
dump_obj(os, spa->spa_feat_desc_obj, "descriptions");
dump_mos(spa);
spa_close(spa, FTAG);
}
static void
feature_enable_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_feature_enable(spa, feature, tx);
}
static void
zhack_do_feature_enable(int argc, char **argv)
{
char c;
char *desc, *target;
spa_t *spa;
objset_t *mos;
zfeature_info_t feature;
zfeature_info_t *nodeps[] = { NULL };
/*
* Features are not added to the pool's label until their refcounts
* are incremented, so fi_mos can just be left as false for now.
*/
desc = NULL;
feature.fi_uname = "zhack";
feature.fi_mos = B_FALSE;
feature.fi_can_readonly = B_FALSE;
feature.fi_depends = nodeps;
optind = 1;
while ((c = getopt(argc, argv, "rmd:")) != -1) {
switch (c) {
case 'r':
feature.fi_can_readonly = B_TRUE;
break;
case 'd':
desc = strdup(optarg);
break;
default:
usage();
break;
}
}
if (desc == NULL)
desc = strdup("zhack injected");
feature.fi_desc = desc;
argc -= optind;
argv += optind;
if (argc < 2) {
(void) fprintf(stderr, "error: missing feature or pool name\n");
usage();
}
target = argv[0];
feature.fi_guid = argv[1];
if (!zfeature_is_valid_guid(feature.fi_guid))
fatal("invalid feature guid: %s", feature.fi_guid);
zhack_spa_open(target, B_FALSE, FTAG, &spa);
mos = spa->spa_meta_objset;
if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
fatal("'%s' is a real feature, will not enable");
if (0 == zap_contains(mos, spa->spa_feat_desc_obj, feature.fi_guid))
fatal("feature already enabled: %s", feature.fi_guid);
VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
feature_enable_sync, spa, &feature, 5));
spa_close(spa, FTAG);
free(desc);
}
static void
feature_incr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_feature_incr(spa, feature, tx);
}
static void
feature_decr_sync(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
zfeature_info_t *feature = arg2;
spa_feature_decr(spa, feature, tx);
}
static void
zhack_do_feature_ref(int argc, char **argv)
{
char c;
char *target;
boolean_t decr = B_FALSE;
spa_t *spa;
objset_t *mos;
zfeature_info_t feature;
zfeature_info_t *nodeps[] = { NULL };
/*
* fi_desc does not matter here because it was written to disk
* when the feature was enabled, but we need to properly set the
* feature for read or write based on the information we read off
* disk later.
*/
feature.fi_uname = "zhack";
feature.fi_mos = B_FALSE;
feature.fi_desc = NULL;
feature.fi_depends = nodeps;
optind = 1;
while ((c = getopt(argc, argv, "md")) != -1) {
switch (c) {
case 'm':
feature.fi_mos = B_TRUE;
break;
case 'd':
decr = B_TRUE;
break;
default:
usage();
break;
}
}
argc -= optind;
argv += optind;
if (argc < 2) {
(void) fprintf(stderr, "error: missing feature or pool name\n");
usage();
}
target = argv[0];
feature.fi_guid = argv[1];
if (!zfeature_is_valid_guid(feature.fi_guid))
fatal("invalid feature guid: %s", feature.fi_guid);
zhack_spa_open(target, B_FALSE, FTAG, &spa);
mos = spa->spa_meta_objset;
if (0 == zfeature_lookup_guid(feature.fi_guid, NULL))
fatal("'%s' is a real feature, will not change refcount");
if (0 == zap_contains(mos, spa->spa_feat_for_read_obj,
feature.fi_guid)) {
feature.fi_can_readonly = B_FALSE;
} else if (0 == zap_contains(mos, spa->spa_feat_for_write_obj,
feature.fi_guid)) {
feature.fi_can_readonly = B_TRUE;
} else {
fatal("feature is not enabled: %s", feature.fi_guid);
}
if (decr && !spa_feature_is_active(spa, &feature))
fatal("feature refcount already 0: %s", feature.fi_guid);
VERIFY3U(0, ==, dsl_sync_task_do(spa->spa_dsl_pool, NULL,
decr ? feature_decr_sync : feature_incr_sync, spa, &feature, 5));
spa_close(spa, FTAG);
}
static int
zhack_do_feature(int argc, char **argv)
{
char *subcommand;
argc--;
argv++;
if (argc == 0) {
(void) fprintf(stderr,
"error: no feature operation specified\n");
usage();
}
subcommand = argv[0];
if (strcmp(subcommand, "stat") == 0) {
zhack_do_feature_stat(argc, argv);
} else if (strcmp(subcommand, "enable") == 0) {
zhack_do_feature_enable(argc, argv);
} else if (strcmp(subcommand, "ref") == 0) {
zhack_do_feature_ref(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
usage();
}
return (0);
}
#define MAX_NUM_PATHS 1024
int
main(int argc, char **argv)
{
extern void zfs_prop_init(void);
char *path[MAX_NUM_PATHS];
const char *subcommand;
int rv = 0;
char c;
g_importargs.path = path;
dprintf_setup(&argc, argv);
zfs_prop_init();
while ((c = getopt(argc, argv, "c:d:")) != -1) {
switch (c) {
case 'c':
g_importargs.cachefile = optarg;
break;
case 'd':
assert(g_importargs.paths < MAX_NUM_PATHS);
g_importargs.path[g_importargs.paths++] = optarg;
break;
default:
usage();
break;
}
}
argc -= optind;
argv += optind;
optind = 1;
if (argc == 0) {
(void) fprintf(stderr, "error: no command specified\n");
usage();
}
subcommand = argv[0];
if (strcmp(subcommand, "feature") == 0) {
rv = zhack_do_feature(argc, argv);
} else {
(void) fprintf(stderr, "error: unknown subcommand: %s\n",
subcommand);
usage();
}
if (!g_readonly && spa_export(g_pool, NULL, B_TRUE, B_TRUE) != 0) {
fatal("pool export failed; "
"changes may not be committed to disk\n");
}
libzfs_fini(g_zfs);
kernel_fini();
return (rv);
}

View File

@ -0,0 +1,174 @@
'\" te
.\" Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>.
.\" All Rights Reserved.
.\"
.\" The contents of this file are subject to the terms of the
.\" Common Development and Distribution License (the "License").
.\" You may not use this file except in compliance with the License.
.\"
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
.\" or http://www.opensolaris.org/os/licensing.
.\" See the License for the specific language governing permissions
.\" and limitations under the License.
.\"
.\" When distributing Covered Code, include this CDDL HEADER in each
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
.\" If applicable, add the following below this CDDL HEADER, with the
.\" fields enclosed by brackets "[]" replaced with your own identifying
.\" information: Portions Copyright [yyyy] [name of copyright owner]
.\"
.\" Copyright (c) 2012 by Delphix. All rights reserved.
.\"
.\" $FreeBSD$
.\"
.Dd May 28, 2012
.Dt ZPOOL-FEATURES 8
.Os
.Sh NAME
.Nm zpool-features
.Nd ZFS pool feature descriptions
.Sh DESCRIPTION
ZFS pool on\-disk format versions are specified via "features" which replace
the old on\-disk format numbers (the last supported on\-disk format number is
28).
To enable a feature on a pool use the
.Xr zpool 8
command to set the
.Sy feature@feature_name
property to
.Ar enabled .
.Pp
The pool format does not affect file system version compatibility or the ability
to send file systems between pools.
.Pp
Since most features can be enabled independently of each other the on\-disk
format of the pool is specified by the set of all features marked as
.Sy active
on the pool. If the pool was created by another software version this set may
include unsupported features.
.Ss Identifying features
Every feature has a guid of the form
.Sy com.example:feature_name .
The reverse DNS name ensures that the feature's guid is unique across all ZFS
implementations. When unsupported features are encountered on a pool they will
be identified by their guids.
Refer to the documentation for the ZFS implementation that created the pool
for information about those features.
.Pp
Each supported feature also has a short name.
By convention a feature's short name is the portion of its guid which follows
the ':' (e.g.
.Sy com.example:feature_name
would have the short name
.Sy feature_name ),
however a feature's short name may differ across ZFS implementations if
following the convention would result in name conflicts.
.Ss Feature states
Features can be in one of three states:
.Bl -tag
.It Sy active
This feature's on\-disk format changes are in effect on the pool.
Support for this feature is required to import the pool in read\-write mode.
If this feature is not read-only compatible, support is also required to
import the pool in read\-only mode (see "Read\-only compatibility").
.It Sy enabled
An administrator has marked this feature as enabled on the pool, but the
feature's on\-disk format changes have not been made yet.
The pool can still be imported by software that does not support this feature,
but changes may be made to the on\-disk format at any time which will move
the feature to the
.Sy active
state.
Some features may support returning to the
.Sy enabled
state after becoming
.Sy active .
See feature\-specific documentation for details.
.It Sy disabled
This feature's on\-disk format changes have not been made and will not be made
unless an administrator moves the feature to the
.Sy enabled
state.
Features cannot be disabled once they have been enabled.
.El
The state of supported features is exposed through pool properties of the form
.Sy feature@short_name .
.Ss Read\-only compatibility
Some features may make on\-disk format changes that do not interfere with other
software's ability to read from the pool.
These features are referred to as "read\-only compatible".
If all unsupported features on a pool are read\-only compatible, the pool can
be imported in read\-only mode by setting the
.Sy readonly
property during import (see
.Xr zpool 8
for details on importing pools).
.Ss Unsupported features
For each unsupported feature enabled on an imported pool a pool property
named
.Sy unsupported@feature_guid
will indicate why the import was allowed despite the unsupported feature.
Possible values for this property are:
.Bl -tag
.It Sy inactive
The feature is in the
.Sy enabled
state and therefore the pool's on\-disk format is still compatible with
software that does not support this feature.
.It Sy readonly
The feature is read\-only compatible and the pool has been imported in
read\-only mode.
.El
.Ss Feature dependencies
Some features depend on other features being enabled in order to function
properly.
Enabling a feature will automatically enable any features it depends on.
.Sh FEATURES
The following features are supported on this system:
.Bl -tag
.It Sy async_destroy
.Bl -column "READ\-ONLY COMPATIBLE" "com.delphix:async_destroy"
.It GUID Ta com.delphix:async_destroy
.It READ\-ONLY COMPATIBLE Ta yes
.It DEPENDENCIES Ta none
.El
.Pp
Destroying a file system requires traversing all of its data in order to
return its used space to the pool.
Without
.Sy async_destroy
the file system is not fully removed until all space has been reclaimed.
If the destroy operation is interrupted by a reboot or power outage the next
attempt to open the pool will need to complete the destroy operation
synchronously.
.Pp
When
.Sy async_destroy
is enabled the file system's data will be reclaimed by a background process,
allowing the destroy operation to complete without traversing the entire file
system.
The background process is able to resume interrupted destroys after the pool
has been opened, eliminating the need to finish interrupted destroys as part
of the open operation.
The amount of space remaining to be reclaimed by the background process is
available through the
.Sy freeing
property.
.Sh SEE ALSO
.Xr zpool 8
.Sh AUTHORS
This manual page is a
.Xr mdoc 7
reimplementation of the
.Tn illumos
manual page
.Em zpool-features(5) ,
modified and customized for
.Fx
and licensed under the Common Development and Distribution License
.Pq Tn CDDL .
.Pp
The
.Xr mdoc 7
implementation of this manual page was initially written by
.An Martin Matuska Aq mm@FreeBSD.org .

View File

@ -1,5 +1,5 @@
'\" te
.\" Copyright (c) 2011, Martin Matuska <mm@FreeBSD.org>.
.\" Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>.
.\" All Rights Reserved.
.\"
.\" The contents of this file are subject to the terms of the
@ -20,6 +20,7 @@
.\" Copyright (c) 2010, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright 2011, Nexenta Systems, Inc. All Rights Reserved.
.\" Copyright (c) 2011, Justin T. Gibbs <gibbs@FreeBSD.org>
.\" Copyright (c) 2012 by Delphix. All Rights Reserved.
.\"
.\" $FreeBSD$
.\"
@ -47,7 +48,7 @@
.Op Ar device
.Nm
.Cm create
.Op Fl fn
.Op Fl fnd
.Op Fl o Ar property Ns = Ns Ar value
.Ar ...
.Op Fl O Ar file-system-property Ns = Ns Ar value
@ -537,6 +538,16 @@ value of 1.76 indicates that 1.76 units of data were stored but only 1 unit of d
for a description of the deduplication feature.
.It Sy free
Number of blocks within the pool that are not allocated.
.It Sy freeing
After a file system or snapshot is destroyed, the space it was using is
returned to the pool asynchronously.
.Sy freeing
is the amount of space remaining to be reclaimed.
Over time
.Sy freeing
will decrease while
.Sy free
increases.
.It Sy expandsize
This property has currently no value on FreeBSD.
.It Sy guid
@ -552,11 +563,16 @@ or
.Qq Sy UNAVAIL .
.It Sy size
Total size of the storage pool.
.It Sy unsupported@ Ns Ar feature_guid
Information about unsupported features that are enabled on the pool.
See
.Xr zpool-features 5
for details.
.It Sy used
Amount of storage space used within the pool.
.El
.Pp
These space usage properties report actual physical space available to the
The space usage properties report actual physical space available to the
storage pool. The physical space can be different from the total amount of
space that any contained datasets can actually use. The amount of space used in
a
@ -653,6 +669,11 @@ Setting it to the special value
creates a temporary pool that is never cached, and the special value
.Cm ''
(empty string) uses the default location.
.It Sy comment Ns = Ns Ar text
A text string consisting of printable ASCII characters that will be stored
such that it is available even if the pool becomes faulted.
An administrator can provide additional information about a pool using this
property.
.It Sy dedupditto Ns = Ns Ar number
Threshold for the number of block ditto copies. If the reference count for a
deduplicated block increases above this number, a new ditto copy of this block
@ -686,6 +707,17 @@ requests that have yet to be committed to disk would be blocked.
.It Sy panic
Prints out a message to the console and generates a system crash dump.
.El
.It Sy feature@ Ns Ar feature_name Ns = Ns Sy enabled
The value of this property is the current state of
.Ar feature_name .
The only valid value when setting this property is
.Sy enabled
which moves
.Ar feature_name
to the enabled state.
See
.Xr zpool-features 5
for details on feature states.
.It Sy listsnaps Ns = Ns Cm on No | Cm off
Controls whether information about snapshots associated with this pool is
output when
@ -699,9 +731,9 @@ The current on-disk version of the pool. This can be increased, but never
decreased. The preferred method of updating pools is with the
.Qq Nm Cm upgrade
command, though this property can be used when a specific version is needed
for backwards compatibility. This property can be any number between 1 and the
current version reported by
.Qo Ic zpool upgrade -v Qc .
for backwards compatibility.
Once feature flags is enabled on a pool this property will no longer have a
value.
.El
.Sh SUBCOMMANDS
All subcommands that modify state are logged persistently to the pool in their
@ -810,7 +842,7 @@ do not actually discard any transactions.
.It Xo
.Nm
.Cm create
.Op Fl fn
.Op Fl fnd
.Op Fl o Ar property Ns = Ns Ar value
.Ar ...
.Op Fl O Ar file-system-property Ns = Ns Ar value
@ -859,6 +891,10 @@ The mount point must not exist or must be empty, or else the
root dataset cannot be mounted. This can be overridden with the
.Fl m
option.
.Pp
By default all supported features are enabled on the new pool unless the
.Fl d
option is specified.
.Bl -tag -width indent
.It Fl f
Forces use of
@ -869,6 +905,17 @@ Not all devices can be overridden in this manner.
Displays the configuration that would be used without actually creating the
pool. The actual pool creation can still fail due to insufficient privileges or
device sharing.
.It Fl d
Do not enable any features on the new pool.
Individual features can be enabled by setting their corresponding properties
to
.Sy enabled
with the
.Fl o
option.
See
.Xr zpool-features 5
for details about feature properties.
.It Xo
.Fl o Ar property Ns = Ns Ar value
.Op Fl o Ar property Ns = Ns Ar value
@ -1873,6 +1920,7 @@ Invalid command line options were specified.
.El
.Sh SEE ALSO
.Xr zfs 8
.Xr zpool-features 5
.Sh AUTHORS
This manual page is a
.Xr mdoc 7

View File

@ -54,6 +54,7 @@
#include "zpool_util.h"
#include "zfs_comutil.h"
#include "zfeature_common.h"
#include "statcommon.h"
@ -207,7 +208,7 @@ get_usage(zpool_help_t idx) {
case HELP_CLEAR:
return (gettext("\tclear [-nF] <pool> [device]\n"));
case HELP_CREATE:
return (gettext("\tcreate [-fn] [-o property=value] ... \n"
return (gettext("\tcreate [-fnd] [-o property=value] ... \n"
"\t [-O file-system-property=value] ... \n"
"\t [-m mountpoint] [-R root] <pool> <vdev> ...\n"));
case HELP_DESTROY:
@ -339,6 +340,12 @@ usage(boolean_t requested)
/* Iterate over all properties */
(void) zprop_iter(print_prop_cb, fp, B_FALSE, B_TRUE,
ZFS_TYPE_POOL);
(void) fprintf(fp, "\t%-15s ", "feature@...");
(void) fprintf(fp, "YES disabled | enabled | active\n");
(void) fprintf(fp, gettext("\nThe feature@ properties must be "
"appended with a feature name.\nSee zpool-features(5).\n"));
}
/*
@ -405,12 +412,16 @@ add_prop_list(const char *propname, char *propval, nvlist_t **props,
proplist = *props;
if (poolprop) {
if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL &&
!zpool_prop_feature(propname)) {
(void) fprintf(stderr, gettext("property '%s' is "
"not a valid pool property\n"), propname);
return (2);
}
normnm = zpool_prop_to_name(prop);
if (zpool_prop_feature(propname))
normnm = propname;
else
normnm = zpool_prop_to_name(prop);
} else {
if ((fprop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
normnm = zfs_prop_to_name(fprop);
@ -701,7 +712,7 @@ errout:
}
/*
* zpool create [-fn] [-o property=value] ...
* zpool create [-fnd] [-o property=value] ...
* [-O file-system-property=value] ...
* [-R root] [-m mountpoint] <pool> <dev> ...
*
@ -710,8 +721,10 @@ errout:
* were to be created.
* -R Create a pool under an alternate root
* -m Set default mountpoint for the root dataset. By default it's
* '/<pool>'
* '/<pool>'
* -o Set property=value.
* -d Don't automatically enable all supported pool features
* (individual features can be enabled with -o).
* -O Set fsproperty=value in the pool's root file system
*
* Creates the named pool according to the given vdev specification. The
@ -724,6 +737,7 @@ zpool_do_create(int argc, char **argv)
{
boolean_t force = B_FALSE;
boolean_t dryrun = B_FALSE;
boolean_t enable_all_pool_feat = B_TRUE;
int c;
nvlist_t *nvroot = NULL;
char *poolname;
@ -735,7 +749,7 @@ zpool_do_create(int argc, char **argv)
char *propval;
/* check options */
while ((c = getopt(argc, argv, ":fnR:m:o:O:")) != -1) {
while ((c = getopt(argc, argv, ":fndR:m:o:O:")) != -1) {
switch (c) {
case 'f':
force = B_TRUE;
@ -743,6 +757,9 @@ zpool_do_create(int argc, char **argv)
case 'n':
dryrun = B_TRUE;
break;
case 'd':
enable_all_pool_feat = B_FALSE;
break;
case 'R':
altroot = optarg;
if (add_prop_list(zpool_prop_to_name(
@ -770,6 +787,21 @@ zpool_do_create(int argc, char **argv)
if (add_prop_list(optarg, propval, &props, B_TRUE))
goto errout;
/*
* If the user is creating a pool that doesn't support
* feature flags, don't enable any features.
*/
if (zpool_name_to_prop(optarg) == ZPOOL_PROP_VERSION) {
char *end;
u_longlong_t ver;
ver = strtoull(propval, &end, 10);
if (*end == '\0' &&
ver < SPA_VERSION_FEATURES) {
enable_all_pool_feat = B_FALSE;
}
}
break;
case 'O':
if ((propval = strchr(optarg, '=')) == NULL) {
@ -835,7 +867,6 @@ zpool_do_create(int argc, char **argv)
goto errout;
}
if (altroot != NULL && altroot[0] != '/') {
(void) fprintf(stderr, gettext("invalid alternate root '%s': "
"must be an absolute path\n"), altroot);
@ -917,6 +948,27 @@ zpool_do_create(int argc, char **argv)
/*
* Hand off to libzfs.
*/
if (enable_all_pool_feat) {
int i;
for (i = 0; i < SPA_FEATURES; i++) {
char propname[MAXPATHLEN];
zfeature_info_t *feat = &spa_feature_table[i];
(void) snprintf(propname, sizeof (propname),
"feature@%s", feat->fi_uname);
/*
* Skip feature if user specified it manually
* on the command line.
*/
if (nvlist_exists(props, propname))
continue;
if (add_prop_list(propname, ZFS_FEATURE_ENABLED,
&props, B_TRUE) != 0)
goto errout;
}
}
if (zpool_create(g_zfs, poolname,
nvroot, props, fsprops) == 0) {
zfs_handle_t *pool = zfs_open(g_zfs, poolname,
@ -1249,6 +1301,10 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
(void) printf(gettext("newer version"));
break;
case VDEV_AUX_UNSUP_FEAT:
(void) printf(gettext("unsupported feature(s)"));
break;
case VDEV_AUX_SPARED:
verify(nvlist_lookup_uint64(nv, ZPOOL_CONFIG_GUID,
&cb.cb_guid) == 0);
@ -1366,6 +1422,10 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
(void) printf(gettext("newer version"));
break;
case VDEV_AUX_UNSUP_FEAT:
(void) printf(gettext("unsupported feature(s)"));
break;
case VDEV_AUX_ERR_EXCEEDED:
(void) printf(gettext("too many errors"));
break;
@ -1532,6 +1592,20 @@ show_import(nvlist_t *config)
"incompatible version.\n"));
break;
case ZPOOL_STATUS_UNSUP_FEAT_READ:
(void) printf(gettext("status: The pool uses the following "
"feature(s) not supported on this sytem:\n"));
zpool_print_unsup_feat(config);
break;
case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
(void) printf(gettext("status: The pool can only be accessed "
"in read-only mode on this system. It\n\tcannot be "
"accessed in read-write mode because it uses the "
"following\n\tfeature(s) not supported on this system:\n"));
zpool_print_unsup_feat(config);
break;
case ZPOOL_STATUS_HOSTID_MISMATCH:
(void) printf(gettext(" status: The pool was last accessed by "
"another system.\n"));
@ -1589,6 +1663,20 @@ show_import(nvlist_t *config)
"newer\n\tsoftware, or recreate the pool from "
"backup.\n"));
break;
case ZPOOL_STATUS_UNSUP_FEAT_READ:
(void) printf(gettext("action: The pool cannot be "
"imported. Access the pool on a system that "
"supports\n\tthe required feature(s), or recreate "
"the pool from backup.\n"));
break;
case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
(void) printf(gettext("action: The pool cannot be "
"imported in read-write mode. Import the pool "
"with\n"
"\t\"-o readonly=on\", access the pool on a system "
"that supports the\n\trequired feature(s), or "
"recreate the pool from backup.\n"));
break;
case ZPOOL_STATUS_MISSING_DEV_R:
case ZPOOL_STATUS_MISSING_DEV_NR:
case ZPOOL_STATUS_BAD_GUID_SUM:
@ -1664,9 +1752,9 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
ZPOOL_CONFIG_POOL_STATE, &state) == 0);
verify(nvlist_lookup_uint64(config,
ZPOOL_CONFIG_VERSION, &version) == 0);
if (version > SPA_VERSION) {
if (!SPA_VERSION_IS_SUPPORTED(version)) {
(void) fprintf(stderr, gettext("cannot import '%s': pool "
"is formatted using a newer ZFS version\n"), name);
"is formatted using an unsupported ZFS version\n"), name);
return (1);
} else if (state != POOL_STATE_EXPORTED &&
!(flags & ZFS_IMPORT_ANY_HOST)) {
@ -2601,15 +2689,13 @@ static void
print_header(list_cbdata_t *cb)
{
zprop_list_t *pl = cb->cb_proplist;
char headerbuf[ZPOOL_MAXPROPLEN];
const char *header;
boolean_t first = B_TRUE;
boolean_t right_justify;
size_t width = 0;
for (; pl != NULL; pl = pl->pl_next) {
if (pl->pl_prop == ZPROP_INVAL)
continue;
width = pl->pl_width;
if (first && cb->cb_verbose) {
/*
@ -2624,8 +2710,18 @@ print_header(list_cbdata_t *cb)
else
first = B_FALSE;
header = zpool_prop_column_name(pl->pl_prop);
right_justify = zpool_prop_align_right(pl->pl_prop);
right_justify = B_FALSE;
if (pl->pl_prop != ZPROP_INVAL) {
header = zpool_prop_column_name(pl->pl_prop);
right_justify = zpool_prop_align_right(pl->pl_prop);
} else {
int i;
for (i = 0; pl->pl_user_prop[i] != '\0'; i++)
headerbuf[i] = toupper(pl->pl_user_prop[i]);
headerbuf[i] = '\0';
header = headerbuf;
}
if (pl->pl_next == NULL && !right_justify)
(void) printf("%s", header);
@ -2685,6 +2781,11 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb)
propstr = property;
right_justify = zpool_prop_align_right(pl->pl_prop);
} else if ((zpool_prop_feature(pl->pl_user_prop) ||
zpool_prop_unsupported(pl->pl_user_prop)) &&
zpool_prop_get_feature(zhp, pl->pl_user_prop, property,
sizeof (property)) == 0) {
propstr = property;
} else {
propstr = "-";
}
@ -4024,6 +4125,31 @@ status_callback(zpool_handle_t *zhp, void *data)
"backup.\n"));
break;
case ZPOOL_STATUS_UNSUP_FEAT_READ:
(void) printf(gettext("status: The pool cannot be accessed on "
"this system because it uses the\n\tfollowing feature(s) "
"not supported on this system:\n"));
zpool_print_unsup_feat(config);
(void) printf("\n");
(void) printf(gettext("action: Access the pool from a system "
"that supports the required feature(s),\n\tor restore the "
"pool from backup.\n"));
break;
case ZPOOL_STATUS_UNSUP_FEAT_WRITE:
(void) printf(gettext("status: The pool can only be accessed "
"in read-only mode on this system. It\n\tcannot be "
"accessed in read-write mode because it uses the "
"following\n\tfeature(s) not supported on this system:\n"));
zpool_print_unsup_feat(config);
(void) printf("\n");
(void) printf(gettext("action: The pool cannot be accessed in "
"read-write mode. Import the pool with\n"
"\t\"-o readonly=on\", access the pool from a system that "
"supports the\n\trequired feature(s), or restore the "
"pool from backup.\n"));
break;
case ZPOOL_STATUS_FAULTED_DEV_R:
(void) printf(gettext("status: One or more devices are "
"faulted in response to persistent errors.\n\tSufficient "
@ -4274,7 +4400,8 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
&version) == 0);
if (!cbp->cb_newer && version < SPA_VERSION) {
if (!cbp->cb_newer && SPA_VERSION_IS_SUPPORTED(version) &&
version != SPA_VERSION) {
if (!cbp->cb_all) {
if (cbp->cb_first) {
(void) printf(gettext("The following pools are "
@ -4303,13 +4430,14 @@ upgrade_cb(zpool_handle_t *zhp, void *arg)
}
}
}
} else if (cbp->cb_newer && version > SPA_VERSION) {
} else if (cbp->cb_newer && !SPA_VERSION_IS_SUPPORTED(version)) {
assert(!cbp->cb_all);
if (cbp->cb_first) {
(void) printf(gettext("The following pools are "
"formatted using a newer software version and\n"
"cannot be accessed on the current system.\n\n"));
"formatted using an unsupported software version "
"and\ncannot be accessed on the current "
"system.\n\n"));
(void) printf(gettext("VER POOL\n"));
(void) printf(gettext("--- ------------\n"));
cbp->cb_first = B_FALSE;
@ -4397,8 +4525,8 @@ zpool_do_upgrade(int argc, char **argv)
break;
case 'V':
cb.cb_version = strtoll(optarg, &end, 10);
if (*end != '\0' || cb.cb_version > SPA_VERSION ||
cb.cb_version < SPA_VERSION_1) {
if (*end != '\0' ||
!SPA_VERSION_IS_SUPPORTED(cb.cb_version)) {
(void) fprintf(stderr,
gettext("invalid version '%s'\n"), optarg);
usage(B_FALSE);
@ -4443,8 +4571,8 @@ zpool_do_upgrade(int argc, char **argv)
}
}
(void) printf(gettext("This system is currently running "
"ZFS pool version %llu.\n\n"), SPA_VERSION);
(void) printf(gettext("This system supports ZFS pool feature "
"flags.\n\n"));
cb.cb_first = B_TRUE;
if (showversions) {
(void) printf(gettext("The following versions are "
@ -4705,13 +4833,26 @@ get_callback(zpool_handle_t *zhp, void *data)
pl == cbp->cb_proplist)
continue;
if (zpool_get_prop(zhp, pl->pl_prop,
value, sizeof (value), &srctype) != 0)
continue;
if (pl->pl_prop == ZPROP_INVAL &&
(zpool_prop_feature(pl->pl_user_prop) ||
zpool_prop_unsupported(pl->pl_user_prop))) {
srctype = ZPROP_SRC_LOCAL;
zprop_print_one_property(zpool_get_name(zhp), cbp,
zpool_prop_to_name(pl->pl_prop), value, srctype, NULL,
NULL);
if (zpool_prop_get_feature(zhp, pl->pl_user_prop,
value, sizeof (value)) == 0) {
zprop_print_one_property(zpool_get_name(zhp),
cbp, pl->pl_user_prop, value, srctype,
NULL, NULL);
}
} else {
if (zpool_get_prop(zhp, pl->pl_prop, value,
sizeof (value), &srctype) != 0)
continue;
zprop_print_one_property(zpool_get_name(zhp), cbp,
zpool_prop_to_name(pl->pl_prop), value, srctype,
NULL, NULL);
}
}
return (0);
}
@ -4723,8 +4864,11 @@ zpool_do_get(int argc, char **argv)
zprop_list_t fake_name = { 0 };
int ret;
if (argc < 3)
if (argc < 2) {
(void) fprintf(stderr, gettext("missing property "
"argument\n"));
usage(B_FALSE);
}
cb.cb_first = B_TRUE;
cb.cb_sources = ZPROP_SRC_ALL;
@ -4734,7 +4878,7 @@ zpool_do_get(int argc, char **argv)
cb.cb_columns[3] = GET_COL_SOURCE;
cb.cb_type = ZFS_TYPE_POOL;
if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
if (zprop_get_list(g_zfs, argv[1], &cb.cb_proplist,
ZFS_TYPE_POOL) != 0)
usage(B_FALSE);

View File

@ -107,6 +107,7 @@
#include <sys/dsl_scan.h>
#include <sys/zio_checksum.h>
#include <sys/refcount.h>
#include <sys/zfeature.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
@ -5575,10 +5576,9 @@ make_random_props()
{
nvlist_t *props;
if (ztest_random(2) == 0)
return (NULL);
VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
if (ztest_random(2) == 0)
return (props);
VERIFY(nvlist_add_uint64(props, "autoreplace", 1) == 0);
return (props);
@ -5609,6 +5609,12 @@ ztest_init(ztest_shared_t *zs)
nvroot = make_vdev_root(NULL, NULL, ztest_opts.zo_vdev_size, 0,
0, ztest_opts.zo_raidz, zs->zs_mirrors, 1);
props = make_random_props();
for (int i = 0; i < SPA_FEATURES; i++) {
char buf[1024];
(void) snprintf(buf, sizeof (buf), "feature@%s",
spa_feature_table[i].fi_uname);
VERIFY3U(0, ==, nvlist_add_uint64(props, buf, 0));
}
VERIFY3U(0, ==, spa_create(ztest_opts.zo_pool, nvroot, props,
NULL, NULL));
nvlist_free(nvroot);
@ -5616,6 +5622,7 @@ ztest_init(ztest_shared_t *zs)
VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
zs->zs_metaslab_sz =
1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
spa_close(spa, FTAG);
kernel_fini();
@ -5654,9 +5661,24 @@ setup_fds(void)
ASSERT3U(fd, ==, ZTEST_FD_RAND);
}
static int
shared_data_size(ztest_shared_hdr_t *hdr)
{
int size;
size = hdr->zh_hdr_size;
size += hdr->zh_opts_size;
size += hdr->zh_size;
size += hdr->zh_stats_size * hdr->zh_stats_count;
size += hdr->zh_ds_size * hdr->zh_ds_count;
return (size);
}
static void
setup_hdr(void)
{
int size;
ztest_shared_hdr_t *hdr;
#ifndef illumos
@ -5667,6 +5689,8 @@ setup_hdr(void)
PROT_READ | PROT_WRITE, MAP_SHARED, ZTEST_FD_DATA, 0);
ASSERT(hdr != MAP_FAILED);
VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, sizeof (ztest_shared_hdr_t)));
hdr->zh_hdr_size = sizeof (ztest_shared_hdr_t);
hdr->zh_opts_size = sizeof (ztest_shared_opts_t);
hdr->zh_size = sizeof (ztest_shared_t);
@ -5675,6 +5699,9 @@ setup_hdr(void)
hdr->zh_ds_size = sizeof (ztest_shared_ds_t);
hdr->zh_ds_count = ztest_opts.zo_datasets;
size = shared_data_size(hdr);
VERIFY3U(0, ==, ftruncate(ZTEST_FD_DATA, size));
(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
}
@ -5689,11 +5716,7 @@ setup_data(void)
PROT_READ, MAP_SHARED, ZTEST_FD_DATA, 0);
ASSERT(hdr != MAP_FAILED);
size = hdr->zh_hdr_size;
size += hdr->zh_opts_size;
size += hdr->zh_size;
size += hdr->zh_stats_size * hdr->zh_stats_count;
size += hdr->zh_ds_size * hdr->zh_ds_count;
size = shared_data_size(hdr);
(void) munmap((caddr_t)hdr, P2ROUNDUP(sizeof (*hdr), getpagesize()));
hdr = ztest_shared_hdr = (void *)mmap(0, P2ROUNDUP(size, getpagesize()),

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <solaris.h>
@ -802,6 +803,10 @@ dump_nvlist(nvlist_t *list, int indent)
while ((elem = nvlist_next_nvpair(list, elem)) != NULL) {
switch (nvpair_type(elem)) {
case DATA_TYPE_BOOLEAN:
(void) printf("%*s%s\n", indent, "", nvpair_name(elem));
break;
case DATA_TYPE_BOOLEAN_VALUE:
(void) nvpair_value_boolean_value(elem, &bool_value);
(void) printf("%*s%s: %s\n", indent, "",

View File

@ -294,6 +294,15 @@ typedef enum {
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
/*
* If the pool has unsupported features but can still be opened in
* read-only mode, its status is ZPOOL_STATUS_UNSUP_FEAT_WRITE. If the
* pool has unsupported features but cannot be opened at all, its
* status is ZPOOL_STATUS_UNSUP_FEAT_READ.
*/
ZPOOL_STATUS_UNSUP_FEAT_READ, /* unsupported features for read */
ZPOOL_STATUS_UNSUP_FEAT_WRITE, /* unsupported features for write */
/*
* These faults have no corresponding message ID. At the time we are
* checking the status, the original reason for the FMA fault (I/O or
@ -326,6 +335,7 @@ extern void zpool_dump_ddt(const ddt_stat_t *dds, const ddt_histogram_t *ddh);
* Statistics and configuration functions.
*/
extern nvlist_t *zpool_get_config(zpool_handle_t *, nvlist_t **);
extern nvlist_t *zpool_get_features(zpool_handle_t *);
extern int zpool_refresh_stats(zpool_handle_t *, boolean_t *);
extern int zpool_get_errlog(zpool_handle_t *, nvlist_t **);
@ -338,6 +348,7 @@ extern int zpool_import(libzfs_handle_t *, nvlist_t *, const char *,
char *altroot);
extern int zpool_import_props(libzfs_handle_t *, nvlist_t *, const char *,
nvlist_t *, int);
extern void zpool_print_unsup_feat(nvlist_t *config);
/*
* Search for pools to import
@ -427,6 +438,8 @@ extern int zfs_prop_get_written_int(zfs_handle_t *zhp, const char *propname,
uint64_t *propvalue);
extern int zfs_prop_get_written(zfs_handle_t *zhp, const char *propname,
char *propbuf, int proplen, boolean_t literal);
extern int zfs_prop_get_feature(zfs_handle_t *zhp, const char *propname,
char *buf, size_t len);
extern int zfs_get_snapused_int(zfs_handle_t *firstsnap, zfs_handle_t *lastsnap,
uint64_t *usedp);
extern uint64_t zfs_prop_get_int(zfs_handle_t *, zfs_prop_t);
@ -454,10 +467,19 @@ extern void zfs_prune_proplist(zfs_handle_t *, uint8_t *);
#define ZFS_MOUNTPOINT_NONE "none"
#define ZFS_MOUNTPOINT_LEGACY "legacy"
#define ZFS_FEATURE_DISABLED "disabled"
#define ZFS_FEATURE_ENABLED "enabled"
#define ZFS_FEATURE_ACTIVE "active"
#define ZFS_UNSUPPORTED_INACTIVE "inactive"
#define ZFS_UNSUPPORTED_READONLY "readonly"
/*
* zpool property management
*/
extern int zpool_expand_proplist(zpool_handle_t *, zprop_list_t **);
extern int zpool_prop_get_feature(zpool_handle_t *, const char *, char *,
size_t);
extern const char *zpool_prop_default_string(zpool_prop_t);
extern uint64_t zpool_prop_default_numeric(zpool_prop_t);
extern const char *zpool_prop_column_name(zpool_prop_t);

View File

@ -18,11 +18,16 @@
*
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
* The pool configuration repository is stored in /etc/zfs/zpool.cache as a
* single packed nvlist. While it would be nice to just read in this
@ -217,6 +222,36 @@ zpool_get_config(zpool_handle_t *zhp, nvlist_t **oldconfig)
return (zhp->zpool_config);
}
/*
* Retrieves a list of enabled features and their refcounts and caches it in
* the pool handle.
*/
nvlist_t *
zpool_get_features(zpool_handle_t *zhp)
{
nvlist_t *config, *features;
config = zpool_get_config(zhp, NULL);
if (config == NULL || !nvlist_exists(config,
ZPOOL_CONFIG_FEATURE_STATS)) {
int error;
boolean_t missing = B_FALSE;
error = zpool_refresh_stats(zhp, &missing);
if (error != 0 || missing)
return (NULL);
config = zpool_get_config(zhp, NULL);
}
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
&features) == 0);
return (features);
}
/*
* Refresh the vdev statistics associated with the given pool. This is used in
* iostat to show configuration changes and determine the delta from the last

View File

@ -43,6 +43,7 @@
#include "zfs_prop.h"
#include "libzfs_impl.h"
#include "zfs_comutil.h"
#include "zfeature_common.h"
static int read_efi_label(nvlist_t *config, diskaddr_t *sb);
@ -301,6 +302,7 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
case ZPOOL_PROP_SIZE:
case ZPOOL_PROP_ALLOCATED:
case ZPOOL_PROP_FREE:
case ZPOOL_PROP_FREEING:
case ZPOOL_PROP_EXPANDSZ:
(void) zfs_nicenum(intval, buf, len);
break;
@ -326,6 +328,12 @@ zpool_get_prop(zpool_handle_t *zhp, zpool_prop_t prop, char *buf, size_t len,
(void) strlcpy(buf, zpool_state_to_name(intval,
vs->vs_aux), len);
break;
case ZPOOL_PROP_VERSION:
if (intval >= SPA_VERSION_FEATURES) {
(void) snprintf(buf, len, "-");
break;
}
/* FALLTHROUGH */
default:
(void) snprintf(buf, len, "%llu", intval);
}
@ -430,10 +438,48 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
const char *propname = nvpair_name(elem);
prop = zpool_name_to_prop(propname);
if (prop == ZPROP_INVAL && zpool_prop_feature(propname)) {
int err;
zfeature_info_t *feature;
char *fname = strchr(propname, '@') + 1;
err = zfeature_lookup_name(fname, &feature);
if (err != 0) {
ASSERT3U(err, ==, ENOENT);
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid feature '%s'"), fname);
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
if (nvpair_type(elem) != DATA_TYPE_STRING) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"'%s' must be a string"), propname);
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
(void) nvpair_value_string(elem, &strval);
if (strcmp(strval, ZFS_FEATURE_ENABLED) != 0) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"property '%s' can only be set to "
"'enabled'"), propname);
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
goto error;
}
if (nvlist_add_uint64(retprops, propname, 0) != 0) {
(void) no_memory(hdl);
goto error;
}
continue;
}
/*
* Make sure this property is valid and applies to this type.
*/
if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL) {
if (prop == ZPROP_INVAL) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid property '%s'"), propname);
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
@ -456,7 +502,8 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
*/
switch (prop) {
case ZPOOL_PROP_VERSION:
if (intval < version || intval > SPA_VERSION) {
if (intval < version ||
!SPA_VERSION_IS_SUPPORTED(intval)) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"property '%s' number %d is invalid."),
propname, intval);
@ -680,10 +727,77 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
libzfs_handle_t *hdl = zhp->zpool_hdl;
zprop_list_t *entry;
char buf[ZFS_MAXPROPLEN];
nvlist_t *features = NULL;
zprop_list_t **last;
boolean_t firstexpand = (NULL == *plp);
if (zprop_expand_list(hdl, plp, ZFS_TYPE_POOL) != 0)
return (-1);
last = plp;
while (*last != NULL)
last = &(*last)->pl_next;
if ((*plp)->pl_all)
features = zpool_get_features(zhp);
if ((*plp)->pl_all && firstexpand) {
for (int i = 0; i < SPA_FEATURES; i++) {
zprop_list_t *entry = zfs_alloc(hdl,
sizeof (zprop_list_t));
entry->pl_prop = ZPROP_INVAL;
entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
spa_feature_table[i].fi_uname);
entry->pl_width = strlen(entry->pl_user_prop);
entry->pl_all = B_TRUE;
*last = entry;
last = &entry->pl_next;
}
}
/* add any unsupported features */
for (nvpair_t *nvp = nvlist_next_nvpair(features, NULL);
nvp != NULL; nvp = nvlist_next_nvpair(features, nvp)) {
char *propname;
boolean_t found;
zprop_list_t *entry;
if (zfeature_is_supported(nvpair_name(nvp)))
continue;
propname = zfs_asprintf(hdl, "unsupported@%s",
nvpair_name(nvp));
/*
* Before adding the property to the list make sure that no
* other pool already added the same property.
*/
found = B_FALSE;
entry = *plp;
while (entry != NULL) {
if (entry->pl_user_prop != NULL &&
strcmp(propname, entry->pl_user_prop) == 0) {
found = B_TRUE;
break;
}
entry = entry->pl_next;
}
if (found) {
free(propname);
continue;
}
entry = zfs_alloc(hdl, sizeof (zprop_list_t));
entry->pl_prop = ZPROP_INVAL;
entry->pl_user_prop = propname;
entry->pl_width = strlen(entry->pl_user_prop);
entry->pl_all = B_TRUE;
*last = entry;
last = &entry->pl_next;
}
for (entry = *plp; entry != NULL; entry = entry->pl_next) {
if (entry->pl_fixed)
@ -700,6 +814,66 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp)
return (0);
}
/*
* Get the state for the given feature on the given ZFS pool.
*/
int
zpool_prop_get_feature(zpool_handle_t *zhp, const char *propname, char *buf,
size_t len)
{
uint64_t refcount;
boolean_t found = B_FALSE;
nvlist_t *features = zpool_get_features(zhp);
boolean_t supported;
const char *feature = strchr(propname, '@') + 1;
supported = zpool_prop_feature(propname);
ASSERT(supported || zpool_prop_unsupported(propname));
/*
* Convert from feature name to feature guid. This conversion is
* unecessary for unsupported@... properties because they already
* use guids.
*/
if (supported) {
int ret;
zfeature_info_t *fi;
ret = zfeature_lookup_name(feature, &fi);
if (ret != 0) {
(void) strlcpy(buf, "-", len);
return (ENOTSUP);
}
feature = fi->fi_guid;
}
if (nvlist_lookup_uint64(features, feature, &refcount) == 0)
found = B_TRUE;
if (supported) {
if (!found) {
(void) strlcpy(buf, ZFS_FEATURE_DISABLED, len);
} else {
if (refcount == 0)
(void) strlcpy(buf, ZFS_FEATURE_ENABLED, len);
else
(void) strlcpy(buf, ZFS_FEATURE_ACTIVE, len);
}
} else {
if (found) {
if (refcount == 0) {
(void) strcpy(buf, ZFS_UNSUPPORTED_INACTIVE);
} else {
(void) strcpy(buf, ZFS_UNSUPPORTED_READONLY);
}
} else {
(void) strlcpy(buf, "-", len);
return (ENOTSUP);
}
}
return (0);
}
/*
* Don't start the slice at the default block of 34; many storage
@ -1286,8 +1460,10 @@ zpool_rewind_exclaim(libzfs_handle_t *hdl, const char *name, boolean_t dryrun,
if (!hdl->libzfs_printerr || config == NULL)
return;
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0)
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0) {
return;
}
if (nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
return;
@ -1343,6 +1519,7 @@ zpool_explain_recover(libzfs_handle_t *hdl, const char *name, int reason,
/* All attempted rewinds failed if ZPOOL_CONFIG_LOAD_TIME missing */
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nv) != 0 ||
nvlist_lookup_nvlist(nv, ZPOOL_CONFIG_REWIND_INFO, &nv) != 0 ||
nvlist_lookup_uint64(nv, ZPOOL_CONFIG_LOAD_TIME, &rewindto) != 0)
goto no_info;
@ -1465,6 +1642,30 @@ print_vdev_tree(libzfs_handle_t *hdl, const char *name, nvlist_t *nv,
}
}
void
zpool_print_unsup_feat(nvlist_t *config)
{
nvlist_t *nvinfo, *unsup_feat;
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO, &nvinfo) ==
0);
verify(nvlist_lookup_nvlist(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT,
&unsup_feat) == 0);
for (nvpair_t *nvp = nvlist_next_nvpair(unsup_feat, NULL); nvp != NULL;
nvp = nvlist_next_nvpair(unsup_feat, nvp)) {
char *desc;
verify(nvpair_type(nvp) == DATA_TYPE_STRING);
verify(nvpair_value_string(nvp, &desc) == 0);
if (strlen(desc) > 0)
(void) printf("\t%s (%s)\n", nvpair_name(nvp), desc);
else
(void) printf("\t%s\n", nvpair_name(nvp));
}
}
/*
* Import the given pool using the known configuration and a list of
* properties to be set. The configuration should have come from
@ -1571,6 +1772,22 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
switch (error) {
case ENOTSUP:
if (nv != NULL && nvlist_lookup_nvlist(nv,
ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0 &&
nvlist_exists(nvinfo, ZPOOL_CONFIG_UNSUP_FEAT)) {
(void) printf(dgettext(TEXT_DOMAIN, "This "
"pool uses the following feature(s) not "
"supported by this system:\n"));
zpool_print_unsup_feat(nv);
if (nvlist_exists(nvinfo,
ZPOOL_CONFIG_CAN_RDONLY)) {
(void) printf(dgettext(TEXT_DOMAIN,
"All unsupported features are only "
"required for writing to the pool."
"\nThe pool can be imported using "
"'-o readonly=on'.\n"));
}
}
/*
* Unsupported version.
*/

View File

@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -213,6 +215,20 @@ check_status(nvlist_t *config, boolean_t isimport)
vs->vs_aux == VDEV_AUX_VERSION_NEWER)
return (ZPOOL_STATUS_VERSION_NEWER);
/*
* Unsupported feature(s).
*/
if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
vs->vs_aux == VDEV_AUX_UNSUP_FEAT) {
nvlist_t *nvinfo;
verify(nvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
&nvinfo) == 0);
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_CAN_RDONLY))
return (ZPOOL_STATUS_UNSUP_FEAT_WRITE);
return (ZPOOL_STATUS_UNSUP_FEAT_READ);
}
/*
* Check that the config is complete.
*/
@ -300,7 +316,7 @@ check_status(nvlist_t *config, boolean_t isimport)
/*
* Outdated, but usable, version
*/
if (version < SPA_VERSION)
if (SPA_VERSION_IS_SUPPORTED(version) && version != SPA_VERSION)
return (ZPOOL_STATUS_VERSION_OLDER);
return (ZPOOL_STATUS_OK);

View File

@ -18,9 +18,10 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -50,6 +51,7 @@
#include "libzfs_impl.h"
#include "zfs_prop.h"
#include "zfeature_common.h"
int aok;
@ -119,7 +121,8 @@ libzfs_error_description(libzfs_handle_t *hdl)
case EZFS_RESILVERING:
return (dgettext(TEXT_DOMAIN, "currently resilvering"));
case EZFS_BADVERSION:
return (dgettext(TEXT_DOMAIN, "unsupported version"));
return (dgettext(TEXT_DOMAIN, "unsupported version or "
"feature"));
case EZFS_POOLUNAVAIL:
return (dgettext(TEXT_DOMAIN, "pool is unavailable"));
case EZFS_DEVOVERFLOW:
@ -656,6 +659,7 @@ libzfs_init(void)
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
libzfs_mnttab_init(hdl);
return (hdl);
@ -1325,9 +1329,11 @@ addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
* this is a pool property or if this isn't a user-defined
* dataset property,
*/
if (prop == ZPROP_INVAL && (type == ZFS_TYPE_POOL ||
(!zfs_prop_user(propname) && !zfs_prop_userquota(propname) &&
!zfs_prop_written(propname)))) {
if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL &&
!zpool_prop_feature(propname) &&
!zpool_prop_unsupported(propname)) ||
(type == ZFS_TYPE_DATASET && !zfs_prop_user(propname) &&
!zfs_prop_userquota(propname) && !zfs_prop_written(propname)))) {
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
"invalid property '%s'"), propname);
return (zfs_error(hdl, EZFS_BADPROP,
@ -1339,7 +1345,8 @@ addlist(libzfs_handle_t *hdl, char *propname, zprop_list_t **listp,
entry->pl_prop = prop;
if (prop == ZPROP_INVAL) {
if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) == NULL) {
if ((entry->pl_user_prop = zfs_strdup(hdl, propname)) ==
NULL) {
free(entry);
return (-1);
}

View File

@ -474,7 +474,9 @@ vn_rdwr(int uio, vnode_t *vp, void *addr, ssize_t len, offset_t offset,
* To simulate partial disk writes, we split writes into two
* system calls so that the process can be killed in between.
*/
split = (len > 0 ? rand() % len : 0);
int sectors = len >> SPA_MINBLOCKSHIFT;
split = (sectors > 0 ? rand() % sectors : 0) <<
SPA_MINBLOCKSHIFT;
iolen = pwrite64(vp->v_fd, addr, split, offset);
iolen += pwrite64(vp->v_fd, (char *)addr + split,
len - split, offset + split);

View File

@ -8,12 +8,17 @@ LIB= nvpair
SRCS= libnvpair.c \
nvpair_alloc_system.c \
nvpair_alloc_fixed.c \
nvpair.c
nvpair.c \
fnvpair.c
WARNS?= 0
CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
.include <bsd.lib.mk>

View File

@ -27,6 +27,7 @@ SRCS+= libzfs_changelist.c \
libzfs_sendrecv.c \
libzfs_status.c \
libzfs_util.c \
zfeature_common.c \
zfs_comutil.c \
zfs_deleg.c \
zfs_fletcher.c \

View File

@ -5,7 +5,7 @@
.PATH: ${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
PROG= zpool
MAN= zpool.8
MAN= zpool.8 zpool-features.5
SRCS= zpool_main.c zpool_vdev.c zpool_iter.c zpool_util.c zfs_comutil.c
SRCS+= timestamp.c

View File

@ -11,15 +11,16 @@ CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/include
CFLAGS+= -I${.CURDIR}/../../compat/opensolaris/lib/libumem
CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
DPADD= ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
${LIBPTHREAD} ${LIBAVL}
LDADD= -lm -lnvpair -lumem -lzpool -lpthread -lavl
DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBUMEM} ${LIBZPOOL} \
${LIBPTHREAD} ${LIBAVL} ${LIBZFS} ${LIBUUTIL}
LDADD= -lgeom -lm -lnvpair -lumem -lzpool -lpthread -lavl -lzfs -luutil
CSTD= c99

View File

@ -5,11 +5,13 @@
SUBDIR= ${_dtrace} \
${_dtruss} \
${_lockstat} \
${_zdb}
${_zdb} \
${_zhack}
.if ${MK_ZFS} != "no"
.if ${MK_LIBTHR} != "no"
_zdb= zdb
_zhack= zhack
.endif
.endif

View File

@ -0,0 +1,29 @@
# $FreeBSD$
.PATH: ${.CURDIR}/../../../cddl/contrib/opensolaris/cmd/zhack
PROG= zhack
NO_MAN=
WARNS?= 0
CSTD= c99
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/compat/opensolaris
CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/include
CFLAGS+= -I${.CURDIR}/../../../cddl/compat/opensolaris/lib/libumem
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libnvpair
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libuutil/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzfs/common
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/lib/libzpool/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/fs/zfs
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/uts/common/sys
CFLAGS+= -I${.CURDIR}/../../../sys/cddl/contrib/opensolaris/common/zfs
CFLAGS+= -I${.CURDIR}/../../../cddl/contrib/opensolaris/head
CFLAGS+= -I${.CURDIR}/../../lib/libumem
DPADD= ${LIBGEOM} ${LIBM} ${LIBNVPAIR} ${LIBPTHREAD} ${LIBUMEM} \
${LIBUUTIL} ${LIBZFS} ${LIBZPOOL}
LDADD= -lgeom -lm -lnvpair -lpthread -lumem -luutil -lzfs -lzpool
.include <bsd.prog.mk>

View File

@ -49,6 +49,13 @@ struct zfsmount {
*/
static vdev_list_t zfs_vdevs;
/*
* List of ZFS features supported for read
*/
static const char *features_for_read[] = {
NULL
};
/*
* List of all pools, chained through spa_link.
*/
@ -198,6 +205,57 @@ nvlist_find(const unsigned char *nvlist, const char *name, int type,
return (EIO);
}
static int
nvlist_check_features_for_read(const unsigned char *nvlist)
{
const unsigned char *p, *pair;
int junk;
int encoded_size, decoded_size;
int rc;
rc = 0;
p = nvlist;
xdr_int(&p, &junk);
xdr_int(&p, &junk);
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
while (encoded_size && decoded_size) {
int namelen, pairtype;
const char *pairname;
int i, found;
found = 0;
xdr_int(&p, &namelen);
pairname = (const char*) p;
p += roundup(namelen, 4);
xdr_int(&p, &pairtype);
for (i = 0; features_for_read[i] != NULL; i++) {
if (!memcmp(pairname, features_for_read[i], namelen)) {
found = 1;
break;
}
}
if (!found) {
printf("ZFS: unsupported feature: %s\n", pairname);
rc = EIO;
}
p = pair + encoded_size;
pair = p;
xdr_int(&p, &encoded_size);
xdr_int(&p, &decoded_size);
}
return (rc);
}
/*
* Return the next nvlist in an nvlist array.
*/
@ -788,6 +846,7 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
uint64_t is_log;
const char *pool_name;
const unsigned char *vdevs;
const unsigned char *features;
int i, rc, is_newer;
char *upbuf;
const struct uberblock *up;
@ -822,12 +881,18 @@ vdev_probe(vdev_phys_read_t *read, void *read_priv, spa_t **spap)
return (EIO);
}
if (val > SPA_VERSION) {
if (!SPA_VERSION_IS_SUPPORTED(val)) {
printf("ZFS: unsupported ZFS version %u (should be %u)\n",
(unsigned) val, (unsigned) SPA_VERSION);
return (EIO);
}
/* Check ZFS features for read */
rc = nvlist_find(nvlist, ZPOOL_CONFIG_FEATURES_FOR_READ,
DATA_TYPE_NVLIST, 0, &features);
if (nvlist_check_features_for_read(features) != 0)
return (EIO);
if (nvlist_find(nvlist,
ZPOOL_CONFIG_POOL_STATE,
DATA_TYPE_UINT64, 0, &val)) {

View File

@ -53,6 +53,8 @@
* Use is subject to license terms.
*/
#define MAXNAMELEN 256
/* CRC64 table */
#define ZFS_CRC64_POLY 0xC96C5795D7870F42ULL /* ECMA-182, reflected form */
@ -508,6 +510,7 @@ typedef enum {
#define SPA_VERSION_26 26ULL
#define SPA_VERSION_27 27ULL
#define SPA_VERSION_28 28ULL
#define SPA_VERSION_5000 5000ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
@ -515,8 +518,8 @@ typedef enum {
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
#define SPA_VERSION SPA_VERSION_28
#define SPA_VERSION_STRING "28"
#define SPA_VERSION SPA_VERSION_5000
#define SPA_VERSION_STRING "5000"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@ -567,6 +570,12 @@ typedef enum {
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
#define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28
#define SPA_VERSION_FEATURES SPA_VERSION_5000
#define SPA_VERSION_IS_SUPPORTED(v) \
(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
/*
* The following are configuration names used in the nvlist describing a pool's
@ -602,6 +611,7 @@ typedef enum {
#define ZPOOL_CONFIG_HOSTNAME "hostname"
#define ZPOOL_CONFIG_IS_LOG "is_log"
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
/*
* The persistent vdev state is stored as separate values rather than a single

View File

@ -0,0 +1,498 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/nvpair.h>
#ifndef _KERNEL
#include <sys/zfs_context.h>
#else
#include <sys/debug.h>
#include <sys/kmem.h>
#endif
/*
* "Force" nvlist wrapper.
*
* These functions wrap the nvlist_* functions with assertions that assume
* the operation is successful. This allows the caller's code to be much
* more readable, especially for the fnvlist_lookup_* and fnvpair_value_*
* functions, which can return the requested value (rather than filling in
* a pointer).
*
* These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate
* with KM_SLEEP.
*
* More wrappers should be added as needed -- for example
* nvlist_lookup_*_array and nvpair_value_*_array.
*/
nvlist_t *
fnvlist_alloc(void)
{
nvlist_t *nvl;
VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0);
return (nvl);
}
void
fnvlist_free(nvlist_t *nvl)
{
nvlist_free(nvl);
}
size_t
fnvlist_size(nvlist_t *nvl)
{
size_t size;
VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0);
return (size);
}
/*
* Returns allocated buffer of size *sizep. Caller must free the buffer with
* fnvlist_pack_free().
*/
char *
fnvlist_pack(nvlist_t *nvl, size_t *sizep)
{
char *packed = 0;
VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE,
KM_SLEEP), ==, 0);
return (packed);
}
/*ARGSUSED*/
void
fnvlist_pack_free(char *pack, size_t size)
{
#ifdef _KERNEL
kmem_free(pack, size);
#else
free(pack);
#endif
}
nvlist_t *
fnvlist_unpack(char *buf, size_t buflen)
{
nvlist_t *rv;
VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0);
return (rv);
}
nvlist_t *
fnvlist_dup(nvlist_t *nvl)
{
nvlist_t *rv;
VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0);
return (rv);
}
void
fnvlist_merge(nvlist_t *dst, nvlist_t *src)
{
VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0);
}
void
fnvlist_add_boolean(nvlist_t *nvl, const char *name)
{
VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0);
}
void
fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
{
VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0);
}
void
fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
{
VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0);
}
void
fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
{
VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0);
}
void
fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
{
VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0);
}
void
fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
{
VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0);
}
void
fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
{
VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0);
}
void
fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
{
VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0);
}
void
fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
{
VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0);
}
void
fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
{
VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0);
}
void
fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
{
VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0);
}
void
fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
{
VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0);
}
void
fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
{
VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0);
}
void
fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
{
VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0);
}
void
fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
boolean_t *val, uint_t n)
{
VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
{
VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
{
VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
{
VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
{
VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
uint16_t *val, uint_t n)
{
VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
{
VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
uint32_t *val, uint_t n)
{
VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
{
VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
uint64_t *val, uint_t n)
{
VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_string_array(nvlist_t *nvl, const char *name,
char * const *val, uint_t n)
{
VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
nvlist_t **val, uint_t n)
{
VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0);
}
void
fnvlist_remove(nvlist_t *nvl, const char *name)
{
VERIFY3U(nvlist_remove_all(nvl, name), ==, 0);
}
void
fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
{
VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0);
}
nvpair_t *
fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
{
nvpair_t *rv;
VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0);
return (rv);
}
/* returns B_TRUE if the entry exists */
boolean_t
fnvlist_lookup_boolean(nvlist_t *nvl, const char *name)
{
return (nvlist_lookup_boolean(nvl, name) == 0);
}
boolean_t
fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
{
boolean_t rv;
VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0);
return (rv);
}
uchar_t
fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
{
uchar_t rv;
VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0);
return (rv);
}
int8_t
fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
{
int8_t rv;
VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0);
return (rv);
}
int16_t
fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
{
int16_t rv;
VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0);
return (rv);
}
int32_t
fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
{
int32_t rv;
VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0);
return (rv);
}
int64_t
fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
{
int64_t rv;
VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0);
return (rv);
}
uint8_t
fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name)
{
uint8_t rv;
VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0);
return (rv);
}
uint16_t
fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
{
uint16_t rv;
VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0);
return (rv);
}
uint32_t
fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
{
uint32_t rv;
VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0);
return (rv);
}
uint64_t
fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
{
uint64_t rv;
VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0);
return (rv);
}
char *
fnvlist_lookup_string(nvlist_t *nvl, const char *name)
{
char *rv;
VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0);
return (rv);
}
nvlist_t *
fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
{
nvlist_t *rv;
VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0);
return (rv);
}
boolean_t
fnvpair_value_boolean_value(nvpair_t *nvp)
{
boolean_t rv;
VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0);
return (rv);
}
uchar_t
fnvpair_value_byte(nvpair_t *nvp)
{
uchar_t rv;
VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0);
return (rv);
}
int8_t
fnvpair_value_int8(nvpair_t *nvp)
{
int8_t rv;
VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0);
return (rv);
}
int16_t
fnvpair_value_int16(nvpair_t *nvp)
{
int16_t rv;
VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0);
return (rv);
}
int32_t
fnvpair_value_int32(nvpair_t *nvp)
{
int32_t rv;
VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0);
return (rv);
}
int64_t
fnvpair_value_int64(nvpair_t *nvp)
{
int64_t rv;
VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0);
return (rv);
}
uint8_t
fnvpair_value_uint8_t(nvpair_t *nvp)
{
uint8_t rv;
VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0);
return (rv);
}
uint16_t
fnvpair_value_uint16(nvpair_t *nvp)
{
uint16_t rv;
VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0);
return (rv);
}
uint32_t
fnvpair_value_uint32(nvpair_t *nvp)
{
uint32_t rv;
VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0);
return (rv);
}
uint64_t
fnvpair_value_uint64(nvpair_t *nvp)
{
uint64_t rv;
VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0);
return (rv);
}
char *
fnvpair_value_string(nvpair_t *nvp)
{
char *rv;
VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0);
return (rv);
}
nvlist_t *
fnvpair_value_nvlist(nvpair_t *nvp)
{
nvlist_t *rv;
VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0);
return (rv);
}

View File

@ -0,0 +1,155 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifdef _KERNEL
#include <sys/systm.h>
#else
#include <errno.h>
#include <string.h>
#endif
#include <sys/debug.h>
#include <sys/fs/zfs.h>
#include <sys/types.h>
#include "zfeature_common.h"
/*
* Set to disable all feature checks while opening pools, allowing pools with
* unsupported features to be opened. Set for testing only.
*/
boolean_t zfeature_checks_disable = B_FALSE;
zfeature_info_t spa_feature_table[SPA_FEATURES];
/*
* Valid characters for feature guids. This list is mainly for aesthetic
* purposes and could be expanded in the future. There are different allowed
* characters in the guids reverse dns portion (before the colon) and its
* short name (after the colon).
*/
static int
valid_char(char c, boolean_t after_colon)
{
return ((c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') ||
c == (after_colon ? '_' : '.'));
}
/*
* Every feature guid must contain exactly one colon which separates a reverse
* dns organization name from the feature's "short" name (e.g.
* "com.company:feature_name").
*/
boolean_t
zfeature_is_valid_guid(const char *name)
{
int i;
boolean_t has_colon = B_FALSE;
i = 0;
while (name[i] != '\0') {
char c = name[i++];
if (c == ':') {
if (has_colon)
return (B_FALSE);
has_colon = B_TRUE;
continue;
}
if (!valid_char(c, has_colon))
return (B_FALSE);
}
return (has_colon);
}
boolean_t
zfeature_is_supported(const char *guid)
{
if (zfeature_checks_disable)
return (B_TRUE);
return (0 == zfeature_lookup_guid(guid, NULL));
}
int
zfeature_lookup_guid(const char *guid, zfeature_info_t **res)
{
for (int i = 0; i < SPA_FEATURES; i++) {
zfeature_info_t *feature = &spa_feature_table[i];
if (strcmp(guid, feature->fi_guid) == 0) {
if (res != NULL)
*res = feature;
return (0);
}
}
return (ENOENT);
}
int
zfeature_lookup_name(const char *name, zfeature_info_t **res)
{
for (int i = 0; i < SPA_FEATURES; i++) {
zfeature_info_t *feature = &spa_feature_table[i];
if (strcmp(name, feature->fi_uname) == 0) {
if (res != NULL)
*res = feature;
return (0);
}
}
return (ENOENT);
}
static void
zfeature_register(int fid, const char *guid, const char *name, const char *desc,
boolean_t readonly, boolean_t mos, zfeature_info_t **deps)
{
zfeature_info_t *feature = &spa_feature_table[fid];
static zfeature_info_t *nodeps[] = { NULL };
ASSERT(name != NULL);
ASSERT(desc != NULL);
ASSERT(!readonly || !mos);
ASSERT3U(fid, <, SPA_FEATURES);
ASSERT(zfeature_is_valid_guid(guid));
if (deps == NULL)
deps = nodeps;
feature->fi_guid = guid;
feature->fi_uname = name;
feature->fi_desc = desc;
feature->fi_can_readonly = readonly;
feature->fi_mos = mos;
feature->fi_depends = deps;
}
void
zpool_feature_init(void)
{
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
"com.delphix:async_destroy", "async_destroy",
"Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
}

View File

@ -0,0 +1,70 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _ZFEATURE_COMMON_H
#define _ZFEATURE_COMMON_H
#include <sys/fs/zfs.h>
#include <sys/types.h>
#ifdef __cplusplus
extern "C" {
#endif
struct zfeature_info;
typedef struct zfeature_info {
const char *fi_uname; /* User-facing feature name */
const char *fi_guid; /* On-disk feature identifier */
const char *fi_desc; /* Feature description */
boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
struct zfeature_info **fi_depends; /* array; null terminated */
} zfeature_info_t;
typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
#define ZFS_FEATURE_DEBUG
static enum spa_feature {
SPA_FEATURE_ASYNC_DESTROY,
SPA_FEATURES
} spa_feature_t;
extern zfeature_info_t spa_feature_table[SPA_FEATURES];
extern boolean_t zfeature_is_valid_guid(const char *);
extern boolean_t zfeature_is_supported(const char *);
extern int zfeature_lookup_guid(const char *, zfeature_info_t **res);
extern int zfeature_lookup_name(const char *, zfeature_info_t **res);
extern void zpool_feature_init(void);
#ifdef __cplusplus
}
#endif
#endif /* _ZFEATURE_COMMON_H */

View File

@ -79,6 +79,8 @@ zpool_prop_init(void)
ZFS_TYPE_POOL, "<size>", "SIZE");
zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "FREE");
zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY,
ZFS_TYPE_POOL, "<size>", "FREEING");
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,
@ -166,6 +168,26 @@ zpool_prop_default_numeric(zpool_prop_t prop)
return (zpool_prop_table[prop].pd_numdefault);
}
/*
* Returns true if this is a valid feature@ property.
*/
boolean_t
zpool_prop_feature(const char *name)
{
static const char *prefix = "feature@";
return (strncmp(name, prefix, strlen(prefix)) == 0);
}
/*
* Returns true if this is a valid unsupported@ property.
*/
boolean_t
zpool_prop_unsupported(const char *name)
{
static const char *prefix = "unsupported@";
return (strncmp(name, prefix, strlen(prefix)) == 0);
}
int
zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
uint64_t *index)

View File

@ -21,6 +21,7 @@
#
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
# Copyright (c) 2012 by Delphix. All rights reserved.
#
#
# This Makefile defines all file modules for the directory uts/common
@ -31,6 +32,7 @@ ZFS_COMMON_OBJS += \
arc.o \
bplist.o \
bpobj.o \
bptree.o \
dbuf.o \
ddt.o \
ddt_zap.o \
@ -52,6 +54,7 @@ ZFS_COMMON_OBJS += \
dsl_deleg.o \
dsl_prop.o \
dsl_scan.o \
zfeature.o \
gzip.o \
lzjb.o \
metaslab.o \
@ -94,11 +97,12 @@ ZFS_COMMON_OBJS += \
zrlock.o
ZFS_SHARED_OBJS += \
zfs_namecheck.o \
zfs_deleg.o \
zfs_prop.o \
zfeature_common.o \
zfs_comutil.o \
zfs_deleg.o \
zfs_fletcher.o \
zfs_namecheck.o \
zfs_prop.o \
zpool_prop.o \
zprop_common.o

View File

@ -2794,9 +2794,11 @@ arc_read_done(zio_t *zio)
callback_list = hdr->b_acb;
ASSERT(callback_list != NULL);
if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
dmu_object_byteswap_t bswap =
DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
byteswap_uint64_array :
dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
dmu_ot_byteswap[bswap].ob_func;
func(buf->b_data, hdr->b_size);
}

View File

@ -0,0 +1,224 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/arc.h>
#include <sys/bptree.h>
#include <sys/dmu.h>
#include <sys/dmu_objset.h>
#include <sys/dmu_tx.h>
#include <sys/dmu_traverse.h>
#include <sys/dsl_dataset.h>
#include <sys/dsl_dir.h>
#include <sys/dsl_pool.h>
#include <sys/dnode.h>
#include <sys/refcount.h>
#include <sys/spa.h>
/*
* A bptree is a queue of root block pointers from destroyed datasets. When a
* dataset is destroyed its root block pointer is put on the end of the pool's
* bptree queue so the dataset's blocks can be freed asynchronously by
* dsl_scan_sync. This allows the delete operation to finish without traversing
* all the dataset's blocks.
*
* Note that while bt_begin and bt_end are only ever incremented in this code
* they are effectively reset to 0 every time the entire bptree is freed because
* the bptree's object is destroyed and re-created.
*/
struct bptree_args {
bptree_phys_t *ba_phys; /* data in bonus buffer, dirtied if freeing */
boolean_t ba_free; /* true if freeing during traversal */
bptree_itor_t *ba_func; /* function to call for each blockpointer */
void *ba_arg; /* caller supplied argument to ba_func */
dmu_tx_t *ba_tx; /* caller supplied tx, NULL if not freeing */
} bptree_args_t;
uint64_t
bptree_alloc(objset_t *os, dmu_tx_t *tx)
{
uint64_t obj;
dmu_buf_t *db;
bptree_phys_t *bt;
obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
sizeof (bptree_phys_t), tx);
/*
* Bonus buffer contents are already initialized to 0, but for
* readability we make it explicit.
*/
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
dmu_buf_will_dirty(db, tx);
bt = db->db_data;
bt->bt_begin = 0;
bt->bt_end = 0;
bt->bt_bytes = 0;
bt->bt_comp = 0;
bt->bt_uncomp = 0;
dmu_buf_rele(db, FTAG);
return (obj);
}
int
bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
{
dmu_buf_t *db;
bptree_phys_t *bt;
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
bt = db->db_data;
ASSERT3U(bt->bt_begin, ==, bt->bt_end);
ASSERT3U(bt->bt_bytes, ==, 0);
ASSERT3U(bt->bt_comp, ==, 0);
ASSERT3U(bt->bt_uncomp, ==, 0);
dmu_buf_rele(db, FTAG);
return (dmu_object_free(os, obj, tx));
}
void
bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx)
{
dmu_buf_t *db;
bptree_phys_t *bt;
bptree_entry_phys_t bte;
/*
* bptree objects are in the pool mos, therefore they can only be
* modified in syncing context. Furthermore, this is only modified
* by the sync thread, so no locking is necessary.
*/
ASSERT(dmu_tx_is_syncing(tx));
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
bt = db->db_data;
bte.be_birth_txg = birth_txg;
bte.be_bp = *bp;
bzero(&bte.be_zb, sizeof (bte.be_zb));
dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx);
dmu_buf_will_dirty(db, tx);
bt->bt_end++;
bt->bt_bytes += bytes;
bt->bt_comp += comp;
bt->bt_uncomp += uncomp;
dmu_buf_rele(db, FTAG);
}
/* ARGSUSED */
static int
bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
{
int err;
struct bptree_args *ba = arg;
if (bp == NULL)
return (0);
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
if (err == 0 && ba->ba_free) {
ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp);
ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp);
ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp);
}
return (err);
}
int
bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
void *arg, dmu_tx_t *tx)
{
int err;
uint64_t i;
dmu_buf_t *db;
struct bptree_args ba;
ASSERT(!free || dmu_tx_is_syncing(tx));
err = dmu_bonus_hold(os, obj, FTAG, &db);
if (err != 0)
return (err);
if (free)
dmu_buf_will_dirty(db, tx);
ba.ba_phys = db->db_data;
ba.ba_free = free;
ba.ba_func = func;
ba.ba_arg = arg;
ba.ba_tx = tx;
err = 0;
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
bptree_entry_phys_t bte;
ASSERT(!free || i == ba.ba_phys->bt_begin);
err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
&bte, DMU_READ_NO_PREFETCH);
if (err != 0)
break;
err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
bptree_visit_cb, &ba);
if (free) {
ASSERT(err == 0 || err == ERESTART);
if (err != 0) {
/* save bookmark for future resume */
ASSERT3U(bte.be_zb.zb_objset, ==,
ZB_DESTROYED_OBJSET);
ASSERT3U(bte.be_zb.zb_level, ==, 0);
dmu_write(os, obj, i * sizeof (bte),
sizeof (bte), &bte, tx);
break;
} else {
ba.ba_phys->bt_begin++;
(void) dmu_free_range(os, obj,
i * sizeof (bte), sizeof (bte), tx);
}
}
}
ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end);
/* if all blocks are free there should be no used space */
if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
ASSERT3U(ba.ba_phys->bt_bytes, ==, 0);
ASSERT3U(ba.ba_phys->bt_comp, ==, 0);
ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0);
}
dmu_buf_rele(db, FTAG);
return (err);
}

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -227,7 +228,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
boolean_t is_metadata;
DB_DNODE_ENTER(db);
is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
DB_DNODE_EXIT(db);
return (is_metadata);

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -1067,11 +1068,9 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
if (spa->spa_ddt_stat_object == 0) {
spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
&spa->spa_ddt_stat_object, tx) == 0);
spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_DDT_STATS, tx);
}
while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -45,60 +46,73 @@
#endif
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
{ byteswap_uint8_array, TRUE, "unallocated" },
{ zap_byteswap, TRUE, "object directory" },
{ byteswap_uint64_array, TRUE, "object array" },
{ byteswap_uint8_array, TRUE, "packed nvlist" },
{ byteswap_uint64_array, TRUE, "packed nvlist size" },
{ byteswap_uint64_array, TRUE, "bpobj" },
{ byteswap_uint64_array, TRUE, "bpobj header" },
{ byteswap_uint64_array, TRUE, "SPA space map header" },
{ byteswap_uint64_array, TRUE, "SPA space map" },
{ byteswap_uint64_array, TRUE, "ZIL intent log" },
{ dnode_buf_byteswap, TRUE, "DMU dnode" },
{ dmu_objset_byteswap, TRUE, "DMU objset" },
{ byteswap_uint64_array, TRUE, "DSL directory" },
{ zap_byteswap, TRUE, "DSL directory child map"},
{ zap_byteswap, TRUE, "DSL dataset snap map" },
{ zap_byteswap, TRUE, "DSL props" },
{ byteswap_uint64_array, TRUE, "DSL dataset" },
{ zfs_znode_byteswap, TRUE, "ZFS znode" },
{ zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" },
{ byteswap_uint8_array, FALSE, "ZFS plain file" },
{ zap_byteswap, TRUE, "ZFS directory" },
{ zap_byteswap, TRUE, "ZFS master node" },
{ zap_byteswap, TRUE, "ZFS delete queue" },
{ byteswap_uint8_array, FALSE, "zvol object" },
{ zap_byteswap, TRUE, "zvol prop" },
{ byteswap_uint8_array, FALSE, "other uint8[]" },
{ byteswap_uint64_array, FALSE, "other uint64[]" },
{ zap_byteswap, TRUE, "other ZAP" },
{ zap_byteswap, TRUE, "persistent error log" },
{ byteswap_uint8_array, TRUE, "SPA history" },
{ byteswap_uint64_array, TRUE, "SPA history offsets" },
{ zap_byteswap, TRUE, "Pool properties" },
{ zap_byteswap, TRUE, "DSL permissions" },
{ zfs_acl_byteswap, TRUE, "ZFS ACL" },
{ byteswap_uint8_array, TRUE, "ZFS SYSACL" },
{ byteswap_uint8_array, TRUE, "FUID table" },
{ byteswap_uint64_array, TRUE, "FUID table size" },
{ zap_byteswap, TRUE, "DSL dataset next clones"},
{ zap_byteswap, TRUE, "scan work queue" },
{ zap_byteswap, TRUE, "ZFS user/group used" },
{ zap_byteswap, TRUE, "ZFS user/group quota" },
{ zap_byteswap, TRUE, "snapshot refcount tags"},
{ zap_byteswap, TRUE, "DDT ZAP algorithm" },
{ zap_byteswap, TRUE, "DDT statistics" },
{ byteswap_uint8_array, TRUE, "System attributes" },
{ zap_byteswap, TRUE, "SA master node" },
{ zap_byteswap, TRUE, "SA attr registration" },
{ zap_byteswap, TRUE, "SA attr layouts" },
{ zap_byteswap, TRUE, "scan translations" },
{ byteswap_uint8_array, FALSE, "deduplicated block" },
{ zap_byteswap, TRUE, "DSL deadlist map" },
{ byteswap_uint64_array, TRUE, "DSL deadlist map hdr" },
{ zap_byteswap, TRUE, "DSL dir clones" },
{ byteswap_uint64_array, TRUE, "bpobj subobj" },
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
{ DMU_BSWAP_UINT64, TRUE, "object array" },
{ DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
{ DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
{ DMU_BSWAP_UINT64, TRUE, "bpobj" },
{ DMU_BSWAP_UINT64, TRUE, "bpobj header" },
{ DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
{ DMU_BSWAP_UINT64, TRUE, "SPA space map" },
{ DMU_BSWAP_UINT64, TRUE, "ZIL intent log" },
{ DMU_BSWAP_DNODE, TRUE, "DMU dnode" },
{ DMU_BSWAP_OBJSET, TRUE, "DMU objset" },
{ DMU_BSWAP_UINT64, TRUE, "DSL directory" },
{ DMU_BSWAP_ZAP, TRUE, "DSL directory child map"},
{ DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" },
{ DMU_BSWAP_ZAP, TRUE, "DSL props" },
{ DMU_BSWAP_UINT64, TRUE, "DSL dataset" },
{ DMU_BSWAP_ZNODE, TRUE, "ZFS znode" },
{ DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" },
{ DMU_BSWAP_UINT8, FALSE, "ZFS plain file" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS directory" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS master node" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" },
{ DMU_BSWAP_UINT8, FALSE, "zvol object" },
{ DMU_BSWAP_ZAP, TRUE, "zvol prop" },
{ DMU_BSWAP_UINT8, FALSE, "other uint8[]" },
{ DMU_BSWAP_UINT64, FALSE, "other uint64[]" },
{ DMU_BSWAP_ZAP, TRUE, "other ZAP" },
{ DMU_BSWAP_ZAP, TRUE, "persistent error log" },
{ DMU_BSWAP_UINT8, TRUE, "SPA history" },
{ DMU_BSWAP_UINT64, TRUE, "SPA history offsets" },
{ DMU_BSWAP_ZAP, TRUE, "Pool properties" },
{ DMU_BSWAP_ZAP, TRUE, "DSL permissions" },
{ DMU_BSWAP_ACL, TRUE, "ZFS ACL" },
{ DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" },
{ DMU_BSWAP_UINT8, TRUE, "FUID table" },
{ DMU_BSWAP_UINT64, TRUE, "FUID table size" },
{ DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"},
{ DMU_BSWAP_ZAP, TRUE, "scan work queue" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" },
{ DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" },
{ DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"},
{ DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" },
{ DMU_BSWAP_ZAP, TRUE, "DDT statistics" },
{ DMU_BSWAP_UINT8, TRUE, "System attributes" },
{ DMU_BSWAP_ZAP, TRUE, "SA master node" },
{ DMU_BSWAP_ZAP, TRUE, "SA attr registration" },
{ DMU_BSWAP_ZAP, TRUE, "SA attr layouts" },
{ DMU_BSWAP_ZAP, TRUE, "scan translations" },
{ DMU_BSWAP_UINT8, FALSE, "deduplicated block" },
{ DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" },
{ DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" },
{ DMU_BSWAP_ZAP, TRUE, "DSL dir clones" },
{ DMU_BSWAP_UINT64, TRUE, "bpobj subobj" }
};
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
{ byteswap_uint8_array, "uint8" },
{ byteswap_uint16_array, "uint16" },
{ byteswap_uint32_array, "uint32" },
{ byteswap_uint64_array, "uint64" },
{ zap_byteswap, "zap" },
{ dnode_buf_byteswap, "dnode" },
{ dmu_objset_byteswap, "objset" },
{ zfs_znode_byteswap, "znode" },
{ zfs_oldacl_byteswap, "oldacl" },
{ zfs_acl_byteswap, "acl" }
};
int
@ -175,7 +189,7 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
DB_DNODE_ENTER(db);
dn = DB_DNODE(db);
if (type > DMU_OT_NUMTYPES) {
if (!DMU_OT_IS_VALID(type)) {
error = EINVAL;
} else if (dn->dn_bonus != db) {
error = EINVAL;
@ -1513,7 +1527,7 @@ void
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
{
dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
(wp & WP_SPILL));
enum zio_checksum checksum = os->os_checksum;
enum zio_compress compress = os->os_compress;

View File

@ -20,11 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2012, Martin Matuska <mm@FreeBSD.org>. All rights reserved.
*/
@ -1117,8 +1114,8 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
void *data = NULL;
if (drro->drr_type == DMU_OT_NONE ||
drro->drr_type >= DMU_OT_NUMTYPES ||
drro->drr_bonustype >= DMU_OT_NUMTYPES ||
!DMU_OT_IS_VALID(drro->drr_type) ||
!DMU_OT_IS_VALID(drro->drr_bonustype) ||
drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
@ -1183,7 +1180,9 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
bcopy(data, db->db_data, drro->drr_bonuslen);
if (ra->byteswap) {
dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drro->drr_bonustype);
dmu_ot_byteswap[byteswap].ob_func(db->db_data,
drro->drr_bonuslen);
}
dmu_buf_rele(db, FTAG);
@ -1226,7 +1225,7 @@ restore_write(struct restorearg *ra, objset_t *os,
int err;
if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
drrw->drr_type >= DMU_OT_NUMTYPES)
!DMU_OT_IS_VALID(drrw->drr_type))
return (EINVAL);
data = restore_read(ra, drrw->drr_length);
@ -1245,8 +1244,11 @@ restore_write(struct restorearg *ra, objset_t *os,
dmu_tx_abort(tx);
return (err);
}
if (ra->byteswap)
dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
if (ra->byteswap) {
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(drrw->drr_type);
dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
}
dmu_write(os, drrw->drr_object,
drrw->drr_offset, drrw->drr_length, data, tx);
dmu_tx_commit(tx);

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -53,6 +54,7 @@ typedef struct traverse_data {
uint64_t td_objset;
blkptr_t *td_rootbp;
uint64_t td_min_txg;
zbookmark_t *td_resume;
int td_flags;
prefetch_data_t *td_pfd;
blkptr_cb_t *td_func;
@ -128,6 +130,54 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
zil_free(zilog);
}
typedef enum resume_skip {
RESUME_SKIP_ALL,
RESUME_SKIP_NONE,
RESUME_SKIP_CHILDREN
} resume_skip_t;
/*
* Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
* the block indicated by zb does not need to be visited at all. Returns
* RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
* resume point. This indicates that this block should be visited but not its
* children (since they must have been visited in a previous traversal).
* Otherwise returns RESUME_SKIP_NONE.
*/
static resume_skip_t
resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
const zbookmark_t *zb)
{
if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
/*
* If we already visited this bp & everything below,
* don't bother doing it again.
*/
if (zbookmark_is_before(dnp, zb, td->td_resume))
return (RESUME_SKIP_ALL);
/*
* If we found the block we're trying to resume from, zero
* the bookmark out to indicate that we have resumed.
*/
ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
bzero(td->td_resume, sizeof (*zb));
if (td->td_flags & TRAVERSE_POST)
return (RESUME_SKIP_CHILDREN);
}
}
return (RESUME_SKIP_NONE);
}
static void
traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
{
ASSERT(td->td_resume != NULL);
ASSERT3U(zb->zb_level, ==, 0);
bcopy(zb, td->td_resume, sizeof (*td->td_resume));
}
static int
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
@ -137,8 +187,20 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
arc_buf_t *buf = NULL;
prefetch_data_t *pd = td->td_pfd;
boolean_t hard = td->td_flags & TRAVERSE_HARD;
boolean_t pause = B_FALSE;
if (bp->blk_birth == 0) {
switch (resume_skip_check(td, dnp, zb)) {
case RESUME_SKIP_ALL:
return (0);
case RESUME_SKIP_CHILDREN:
goto post;
case RESUME_SKIP_NONE:
break;
default:
ASSERT(0);
}
if (BP_IS_HOLE(bp)) {
err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
td->td_arg);
return (err);
@ -164,8 +226,10 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
td->td_arg);
if (err == TRAVERSE_VISIT_NO_CHILDREN)
return (0);
if (err)
return (err);
if (err == ERESTART)
pause = B_TRUE; /* handle pausing at a common point */
if (err != 0)
goto post;
}
if (BP_GET_LEVEL(bp) > 0) {
@ -253,9 +317,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
if (buf)
(void) arc_buf_remove_ref(buf, &buf);
post:
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
td->td_arg);
if (err == ERESTART)
pause = B_TRUE;
}
if (pause && td->td_resume != NULL) {
ASSERT3U(err, ==, ERESTART);
ASSERT(!hard);
traverse_pause(td, zb);
}
return (err != 0 ? err : lasterr);
@ -353,18 +426,23 @@ traverse_prefetch_thread(void *arg)
* in syncing context).
*/
static int
traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
uint64_t txg_start, zbookmark_t *resume, int flags,
blkptr_cb_t func, void *arg)
{
traverse_data_t td;
prefetch_data_t pd = { 0 };
zbookmark_t czb;
int err;
ASSERT(ds == NULL || objset == ds->ds_object);
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
td.td_spa = spa;
td.td_objset = ds ? ds->ds_object : 0;
td.td_objset = objset;
td.td_rootbp = rootbp;
td.td_min_txg = txg_start;
td.td_resume = resume;
td.td_func = func;
td.td_arg = arg;
td.td_pfd = &pd;
@ -416,8 +494,17 @@ int
traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
blkptr_cb_t func, void *arg)
{
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
&ds->ds_phys->ds_bp, txg_start, flags, func, arg));
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
&ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
}
int
traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
uint64_t txg_start, zbookmark_t *resume, int flags,
blkptr_cb_t func, void *arg)
{
return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
blkptr, txg_start, resume, flags, func, arg));
}
/*
@ -434,8 +521,8 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
boolean_t hard = (flags & TRAVERSE_HARD);
/* visit the MOS */
err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
txg_start, flags, func, arg);
err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
txg_start, NULL, flags, func, arg);
if (err)
return (err);

View File

@ -20,9 +20,8 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
/*
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dmu.h>
@ -676,7 +675,7 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
return;
}
ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
if (dn->dn_maxblkid == 0 && !add) {
blkptr_t *bp;

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -196,7 +197,7 @@ dnode_verify(dnode_t *dn)
ASSERT(dn->dn_objset);
ASSERT(dn->dn_handle->dnh_dnode == dn);
ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
return;
@ -215,7 +216,7 @@ dnode_verify(dnode_t *dn)
ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
}
ASSERT3U(dn->dn_nlevels, <=, 30);
ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(dn->dn_type));
ASSERT3U(dn->dn_nblkptr, >=, 1);
ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
@ -281,8 +282,10 @@ dnode_byteswap(dnode_phys_t *dnp)
*/
int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
size_t len = DN_MAX_BONUSLEN - off;
ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
dmu_object_byteswap_t byteswap =
DMU_OT_BYTESWAP(dnp->dn_bonustype);
dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
}
/* Swap SPILL block if we have one */
@ -410,7 +413,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
dmu_zfetch_init(&dn->dn_zfetch, dn);
ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
mutex_enter(&os->os_lock);
list_insert_head(&os->os_dnodes, dn);
@ -499,11 +502,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
ASSERT(ot != DMU_OT_NONE);
ASSERT3U(ot, <, DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(ot));
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0));
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
ASSERT(dn->dn_type == DMU_OT_NONE);
ASSERT3U(dn->dn_maxblkid, ==, 0);
@ -571,7 +574,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
(bonustype == DMU_OT_SA && bonuslen == 0));
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(bonustype));
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
/* clean up any unreferenced dbufs */

View File

@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -597,7 +599,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
}
if (dn->dn_next_bonustype[txgoff]) {
ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
dn->dn_next_bonustype[txgoff] = 0;
}

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
@ -38,6 +38,7 @@
#include <sys/arc.h>
#include <sys/zio.h>
#include <sys/zap.h>
#include <sys/zfeature.h>
#include <sys/unique.h>
#include <sys/zfs_context.h>
#include <sys/zfs_ioctl.h>
@ -103,7 +104,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
if (BP_IS_HOLE(bp))
return;
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
if (ds == NULL) {
/*
* Account for the meta-objset space in its placeholder
@ -120,7 +121,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
mutex_enter(&ds->ds_dir->dd_lock);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
ds->ds_phys->ds_used_bytes += used;
ds->ds_phys->ds_referenced_bytes += used;
ds->ds_phys->ds_compressed_bytes += compressed;
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
ds->ds_phys->ds_unique_bytes += used;
@ -214,8 +215,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
}
}
mutex_enter(&ds->ds_lock);
ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
ds->ds_phys->ds_used_bytes -= used;
ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
ds->ds_phys->ds_referenced_bytes -= used;
ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
ds->ds_phys->ds_compressed_bytes -= compressed;
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
@ -827,8 +828,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsphys->ds_prev_snap_obj = origin->ds_object;
dsphys->ds_prev_snap_txg =
origin->ds_phys->ds_creation_txg;
dsphys->ds_used_bytes =
origin->ds_phys->ds_used_bytes;
dsphys->ds_referenced_bytes =
origin->ds_phys->ds_referenced_bytes;
dsphys->ds_compressed_bytes =
origin->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes =
@ -981,7 +982,6 @@ dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
pair = nvlist_next_nvpair(snaps, pair)) {
dsl_dataset_t *ds;
int err;
err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
if (err == 0) {
@ -1130,19 +1130,23 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
goto out;
/*
* remove the objects in open context, so that we won't
* have too much to do in syncing context.
* If async destruction is not enabled try to remove all objects
* while in the open context so that there is less work to do in
* the syncing context.
*/
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
ds->ds_phys->ds_prev_snap_txg)) {
/*
* Ignore errors, if there is not enough disk space
* we will deal with it in dsl_dataset_destroy_sync().
*/
(void) dmu_free_object(os, obj);
if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
ds->ds_phys->ds_prev_snap_txg)) {
/*
* Ignore errors, if there is not enough disk space
* we will deal with it in dsl_dataset_destroy_sync().
*/
(void) dmu_free_object(os, obj);
}
if (err != ESRCH)
goto out;
}
if (err != ESRCH)
goto out;
/*
* Only the ZIL knows how to free log blocks.
@ -1288,7 +1292,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT(!dsl_dataset_is_snapshot(ds));
if (ds->ds_phys->ds_prev_snap_obj != 0)
mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
else
mrs_used = 0;
@ -1296,7 +1300,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
ASSERT3U(dlused, <=, mrs_used);
ds->ds_phys->ds_unique_bytes =
ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
SPA_VERSION_UNIQUE_ACCURATE)
@ -1655,6 +1659,30 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
ds_next->ds_phys->ds_deadlist_obj);
}
static int
old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
{
int err;
struct killarg ka;
/*
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
* NB: this should be very quick, because we already
* freed all the objects in open context.
*/
ka.ds = ds;
ka.tx = tx;
err = traverse_dataset(ds,
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
kill_blkptr, &ka);
ASSERT3U(err, ==, 0);
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
return (err);
}
void
dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
{
@ -1801,7 +1829,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
tx);
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
DD_USED_HEAD, used, comp, uncomp, tx);
dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
/* Merge our deadlist into next's and free it. */
dsl_deadlist_merge(&ds_next->ds_deadlist,
@ -1877,32 +1904,54 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
}
dsl_dataset_rele(ds_next, FTAG);
} else {
zfeature_info_t *async_destroy =
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
/*
* There's no next snapshot, so this is a head dataset.
* Destroy the deadlist. Unless it's a clone, the
* deadlist should be empty. (If it's a clone, it's
* safe to ignore the deadlist contents.)
*/
struct killarg ka;
dsl_deadlist_close(&ds->ds_deadlist);
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
ds->ds_phys->ds_deadlist_obj = 0;
/*
* Free everything that we point to (that's born after
* the previous snapshot, if we are a clone)
*
* NB: this should be very quick, because we already
* freed all the objects in open context.
*/
ka.ds = ds;
ka.tx = tx;
err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
TRAVERSE_POST, kill_blkptr, &ka);
ASSERT3U(err, ==, 0);
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == 0);
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
err = old_synchronous_dataset_destroy(ds, tx);
} else {
/*
* Move the bptree into the pool's list of trees to
* clean up and update space accounting information.
*/
uint64_t used, comp, uncomp;
ASSERT(err == 0 || err == EBUSY);
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
spa_feature_incr(dp->dp_spa, async_destroy, tx);
dp->dp_bptree_obj = bptree_alloc(
dp->dp_meta_objset, tx);
VERIFY(zap_add(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
&dp->dp_bptree_obj, tx) == 0);
}
used = ds->ds_dir->dd_phys->dd_used_bytes;
comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
ds->ds_phys->ds_unique_bytes == used);
bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
used, comp, uncomp, tx);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-used, -comp, -uncomp, tx);
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
used, comp, uncomp, tx);
}
if (ds->ds_prev != NULL) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
@ -2095,7 +2144,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsphys->ds_creation_time = gethrestime_sec();
dsphys->ds_creation_txg = crtxg;
dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
dsphys->ds_flags = ds->ds_phys->ds_flags;
@ -2219,10 +2268,22 @@ get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
zap_cursor_advance(&zc)) {
dsl_dataset_t *clone;
char buf[ZFS_MAXNAMELEN];
/*
* Even though we hold the dp_config_rwlock, the dataset
* may fail to open, returning ENOENT. If there is a
* thread concurrently attempting to destroy this
* dataset, it will have the ds_rwlock held for
* RW_WRITER. Our call to dsl_dataset_hold_obj() ->
* dsl_dataset_hold_ref() will fail its
* rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
* dp_config_rwlock, and wait for the destroy progress
* and signal ds_exclusive_cv. If the destroy was
* successful, we will see that
* DSL_DATASET_IS_DESTROYED(), and return ENOENT.
*/
if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
za.za_first_integer, FTAG, &clone) != 0) {
goto fail;
}
za.za_first_integer, FTAG, &clone) != 0)
continue;
dsl_dir_name(clone->ds_dir, buf);
VERIFY(nvlist_add_boolean(val, buf) == 0);
dsl_dataset_rele(clone, FTAG);
@ -2345,7 +2406,7 @@ dsl_dataset_space(dsl_dataset_t *ds,
uint64_t *refdbytesp, uint64_t *availbytesp,
uint64_t *usedobjsp, uint64_t *availobjsp)
{
*refdbytesp = ds->ds_phys->ds_used_bytes;
*refdbytesp = ds->ds_phys->ds_referenced_bytes;
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
@ -2688,7 +2749,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* Note however, if we stop before we reach the ORIGIN we get:
* uN + kN + kN-1 + ... + kM - uM-1
*/
pa->used = origin_ds->ds_phys->ds_used_bytes;
pa->used = origin_ds->ds_phys->ds_referenced_bytes;
pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
for (snap = list_head(&pa->shared_snaps); snap;
@ -2722,7 +2783,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
* so we need to subtract out the clone origin's used space.
*/
if (pa->origin_origin) {
pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
}
@ -3238,8 +3299,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_deadlist_space(&csa->ohds->ds_deadlist,
&odl_used, &odl_comp, &odl_uncomp);
dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
(csa->ohds->ds_phys->ds_used_bytes + odl_used);
dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
(csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
(csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
@ -3268,8 +3329,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
}
/* swap ds_*_bytes */
SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
csa->cds->ds_phys->ds_used_bytes);
SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
csa->cds->ds_phys->ds_referenced_bytes);
SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
csa->cds->ds_phys->ds_compressed_bytes);
SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
@ -3398,8 +3459,9 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
* on-disk is over quota and there are no pending changes (which
* may free up space for us).
*/
if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
if (inflight > 0 ||
ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
error = ERESTART;
else
error = EDQUOT;
@ -3426,7 +3488,7 @@ dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
if (psa->psa_effective_value == 0)
return (0);
if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
psa->psa_effective_value < ds->ds_reserved)
return (ENOSPC);
@ -4180,8 +4242,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
dsl_pool_t *dp = new->ds_dir->dd_pool;
*usedp = 0;
*usedp += new->ds_phys->ds_used_bytes;
*usedp -= oldsnap->ds_phys->ds_used_bytes;
*usedp += new->ds_phys->ds_referenced_bytes;
*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
*compp = 0;
*compp += new->ds_phys->ds_compressed_bytes;
@ -4197,9 +4259,13 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
dsl_dataset_t *snap;
uint64_t used, comp, uncomp;
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
if (err != 0)
break;
if (snapobj == new->ds_object) {
snap = new;
} else {
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
if (err != 0)
break;
}
if (snap->ds_phys->ds_prev_snap_txg ==
oldsnap->ds_phys->ds_creation_txg) {
@ -4228,7 +4294,8 @@ dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
* was not a snapshot of/before new.
*/
snapobj = snap->ds_phys->ds_prev_snap_obj;
dsl_dataset_rele(snap, FTAG);
if (snap != new)
dsl_dataset_rele(snap, FTAG);
if (snapobj == 0) {
err = EINVAL;
break;

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -171,10 +171,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
DMU_OT_NONE, 0, tx);
VERIFY(zap_update(mos, zapobj,
whokey, 8, 1, &jumpobj, tx) == 0);
jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
zapobj, whokey, tx);
}
while (permpair = nvlist_next_nvpair(perms, permpair)) {

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_pool.h>
@ -40,6 +40,8 @@
#include <sys/zfs_znode.h>
#include <sys/spa_impl.h>
#include <sys/dsl_deadlist.h>
#include <sys/bptree.h>
#include <sys/zfeature.h>
int zfs_no_write_throttle = 0;
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
@ -125,20 +127,32 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
}
int
dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
{
int err;
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
&dp->dp_meta_objset);
if (err != 0)
dsl_pool_close(dp);
else
*dpp = dp;
return (err);
}
int
dsl_pool_open(dsl_pool_t *dp)
{
int err;
dsl_dir_t *dd;
dsl_dataset_t *ds;
uint64_t obj;
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
&dp->dp_meta_objset);
if (err)
goto out;
ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset));
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
&dp->dp_root_dir_obj);
@ -154,7 +168,7 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
if (err)
goto out;
if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
if (err)
goto out;
@ -171,7 +185,7 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
goto out;
}
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
&dp->dp_free_dir);
if (err)
@ -185,6 +199,15 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
dp->dp_meta_objset, obj));
}
if (spa_feature_is_active(dp->dp_spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
&dp->dp_bptree_obj);
if (err != 0)
goto out;
}
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
&dp->dp_tmp_userrefs_obj);
@ -193,15 +216,10 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
if (err)
goto out;
err = dsl_scan_init(dp, txg);
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
out:
rw_exit(&dp->dp_config_rwlock);
if (err)
dsl_pool_close(dp);
else
*dpp = dp;
return (err);
}
@ -495,7 +513,7 @@ int
dsl_pool_sync_context(dsl_pool_t *dp)
{
return (curthread == dp->dp_tx.tx_sync_thread ||
spa_get_dsl(dp->dp_spa) == NULL);
spa_is_initializing(dp->dp_spa));
}
uint64_t
@ -813,11 +831,8 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
ASSERT(dp->dp_tmp_userrefs_obj == 0);
ASSERT(dmu_tx_is_syncing(tx));
dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
DMU_OT_NONE, 0, tx);
VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
}
static int

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_scan.h>
@ -44,6 +45,7 @@
#include <sys/ddt.h>
#include <sys/sa.h>
#include <sys/sa_impl.h>
#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/zfs_vfsops.h>
#endif
@ -381,55 +383,6 @@ dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
priority, zio_flags, arc_flags, zb));
}
static boolean_t
bookmark_is_zero(const zbookmark_t *zb)
{
return (zb->zb_objset == 0 && zb->zb_object == 0 &&
zb->zb_level == 0 && zb->zb_blkid == 0);
}
/* dnp is the dnode for zb1->zb_object */
static boolean_t
bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
const zbookmark_t *zb2)
{
uint64_t zb1nextL0, zb2thisobj;
ASSERT(zb1->zb_objset == zb2->zb_objset);
ASSERT(zb2->zb_level == 0);
/*
* A bookmark in the deadlist is considered to be after
* everything else.
*/
if (zb2->zb_object == DMU_DEADLIST_OBJECT)
return (B_TRUE);
/* The objset_phys_t isn't before anything. */
if (dnp == NULL)
return (B_FALSE);
zb1nextL0 = (zb1->zb_blkid + 1) <<
((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
zb2thisobj = zb2->zb_object ? zb2->zb_object :
zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
uint64_t nextobj = zb1nextL0 *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
return (nextobj <= zb2thisobj);
}
if (zb1->zb_object < zb2thisobj)
return (B_TRUE);
if (zb1->zb_object > zb2thisobj)
return (B_FALSE);
if (zb2->zb_object == DMU_META_DNODE_OBJECT)
return (B_FALSE);
return (zb1nextL0 <= zb2->zb_blkid);
}
static uint64_t
dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
{
@ -461,7 +414,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
if (scn->scn_pausing)
return (B_TRUE); /* we're already pausing */
if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
return (B_FALSE); /* we're resuming */
/* We only know how to resume from level-0 blocks. */
@ -616,13 +569,13 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
/*
* We never skip over user/group accounting objects (obj<0)
*/
if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
(int64_t)zb->zb_object >= 0) {
/*
* If we already visited this bp & everything below (in
* a prior txg sync), don't bother doing it again.
*/
if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
return (B_TRUE);
/*
@ -815,22 +768,6 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
return;
if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
/*
* For non-user-accounting blocks, we need to read the
* new bp (from a deleted snapshot, found in
* check_existing_xlation). If we used the old bp,
* pointers inside this block from before we resumed
* would be untranslated.
*
* For user-accounting blocks, we need to read the old
* bp, because we will apply the entire space delta to
* it (original untranslated -> translations from
* deleted snap -> now).
*/
bp_toread = *bp;
}
if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
&buf) != 0)
return;
@ -1395,19 +1332,28 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
zap_cursor_fini(&zc);
}
static int
dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
static boolean_t
dsl_scan_free_should_pause(dsl_scan_t *scn)
{
dsl_scan_t *scn = arg;
uint64_t elapsed_nanosecs;
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
(elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
txg_sync_waiting(scn->scn_dp)) ||
spa_shutting_down(scn->scn_dp->dp_spa))
return (ERESTART);
spa_shutting_down(scn->scn_dp->dp_spa));
}
static int
dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_scan_t *scn = arg;
if (!scn->scn_is_bptree ||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
if (dsl_scan_free_should_pause(scn))
return (ERESTART);
}
zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
dmu_tx_get_txg(tx), bp, 0));
@ -1432,6 +1378,10 @@ dsl_scan_active(dsl_scan_t *scn)
if (scn->scn_phys.scn_state == DSS_SCANNING)
return (B_TRUE);
if (spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
return (B_TRUE);
}
if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
&used, &comp, &uncomp);
@ -1478,14 +1428,40 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* traversing it.
*/
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
scn->scn_is_bptree = B_FALSE;
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_MUSTSUCCEED);
err = bpobj_iterate(&dp->dp_free_bpobj,
dsl_scan_free_cb, scn, tx);
dsl_scan_free_block_cb, scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
if (err == 0 && spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
scn->scn_is_bptree = B_TRUE;
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
NULL, ZIO_FLAG_MUSTSUCCEED);
err = bptree_iterate(dp->dp_meta_objset,
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
scn, tx);
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
if (err != 0)
return;
/* disable async destroy feature */
spa_feature_decr(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
ASSERT(!spa_feature_is_active(spa,
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_BPTREE_OBJ, tx));
VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
dp->dp_bptree_obj, tx));
dp->dp_bptree_obj = 0;
}
if (scn->scn_visited_this_txg) {
zfs_dbgmsg("freed %llu blocks in %llums from "
"free_bpobj txg %llu",
"free_bpobj/bptree txg %llu",
(longlong_t)scn->scn_visited_this_txg,
(longlong_t)
(gethrtime() - scn->scn_sync_start_time) / MICROSEC,
@ -1600,6 +1576,8 @@ count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
for (i = 0; i < 4; i++) {
int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
if (t & DMU_OT_NEWTYPE)
t = DMU_OT_OTHER;
zfs_blkstat_t *zb = &zab->zab_type[l][t];
int equal;

View File

@ -18,9 +18,11 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Portions Copyright 2011 iXsystems, Inc
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -427,10 +429,9 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
char attr_name[8];
if (sa->sa_layout_attr_obj == 0) {
sa->sa_layout_attr_obj = zap_create(os,
DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
&sa->sa_layout_attr_obj, tx) == 0);
sa->sa_layout_attr_obj = zap_create_link(os,
DMU_OT_SA_ATTR_LAYOUTS,
sa->sa_master_obj, SA_LAYOUTS, tx);
}
(void) snprintf(attr_name, sizeof (attr_name),
@ -1552,10 +1553,9 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
}
if (sa->sa_reg_attr_obj == 0) {
sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj,
SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0);
sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
DMU_OT_SA_ATTR_REGISTRATION,
sa->sa_master_obj, SA_REGISTRY, tx);
}
for (i = 0; i != sa->sa_num_attrs; i++) {
if (sa->sa_attr_table[i].sa_registered)

View File

@ -60,6 +60,7 @@
#include <sys/spa_boot.h>
#include <sys/zfs_ioctl.h>
#include <sys/dsl_scan.h>
#include <sys/zfeature.h>
#include <sys/zvol.h>
#ifdef _KERNEL
@ -117,6 +118,7 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
static dsl_syncfunc_t spa_sync_version;
static dsl_syncfunc_t spa_sync_props;
static boolean_t spa_has_active_shared_spare(spa_t *spa);
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
@ -176,6 +178,7 @@ static void
spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
{
vdev_t *rvd = spa->spa_root_vdev;
dsl_pool_t *pool = spa->spa_dsl_pool;
uint64_t size;
uint64_t alloc;
uint64_t space;
@ -222,6 +225,22 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
}
if (pool != NULL) {
dsl_dir_t *freedir = pool->dp_free_dir;
/*
* The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
* when opening pools before this version freedir will be NULL.
*/
if (freedir != NULL) {
spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
freedir->dd_phys->dd_used_bytes, src);
} else {
spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
NULL, 0, src);
}
}
spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
if (spa->spa_comment != NULL) {
@ -361,25 +380,55 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
nvpair_t *elem;
int error = 0, reset_bootfs = 0;
uint64_t objnum;
boolean_t has_feature = B_FALSE;
elem = NULL;
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
zpool_prop_t prop;
char *propname, *strval;
uint64_t intval;
objset_t *os;
char *slash, *check;
propname = nvpair_name(elem);
if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
return (EINVAL);
char *strval, *slash, *check, *fname;
const char *propname = nvpair_name(elem);
zpool_prop_t prop = zpool_name_to_prop(propname);
switch (prop) {
case ZPROP_INVAL:
if (!zpool_prop_feature(propname)) {
error = EINVAL;
break;
}
/*
* Sanitize the input.
*/
if (nvpair_type(elem) != DATA_TYPE_UINT64) {
error = EINVAL;
break;
}
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
if (intval != 0) {
error = EINVAL;
break;
}
fname = strchr(propname, '@') + 1;
if (zfeature_lookup_name(fname, NULL) != 0) {
error = EINVAL;
break;
}
has_feature = B_TRUE;
break;
case ZPOOL_PROP_VERSION:
error = nvpair_value_uint64(elem, &intval);
if (!error &&
(intval < spa_version(spa) || intval > SPA_VERSION))
(intval < spa_version(spa) ||
intval > SPA_VERSION_BEFORE_FEATURES ||
has_feature))
error = EINVAL;
break;
@ -416,6 +465,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
error = nvpair_value_string(elem, &strval);
if (!error) {
objset_t *os;
uint64_t compress;
if (strval == NULL || strval[0] == '\0') {
@ -565,33 +615,58 @@ int
spa_prop_set(spa_t *spa, nvlist_t *nvp)
{
int error;
nvpair_t *elem;
nvpair_t *elem = NULL;
boolean_t need_sync = B_FALSE;
zpool_prop_t prop;
if ((error = spa_prop_validate(spa, nvp)) != 0)
return (error);
elem = NULL;
while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
if ((prop = zpool_name_to_prop(
nvpair_name(elem))) == ZPROP_INVAL)
return (EINVAL);
zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
if (prop == ZPOOL_PROP_CACHEFILE ||
prop == ZPOOL_PROP_ALTROOT ||
prop == ZPOOL_PROP_READONLY)
continue;
if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
uint64_t ver;
if (prop == ZPOOL_PROP_VERSION) {
VERIFY(nvpair_value_uint64(elem, &ver) == 0);
} else {
ASSERT(zpool_prop_feature(nvpair_name(elem)));
ver = SPA_VERSION_FEATURES;
need_sync = B_TRUE;
}
/* Save time if the version is already set. */
if (ver == spa_version(spa))
continue;
/*
* In addition to the pool directory object, we might
* create the pool properties object, the features for
* read object, the features for write object, or the
* feature descriptions object.
*/
error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
spa_sync_version, spa, &ver, 6);
if (error)
return (error);
continue;
}
need_sync = B_TRUE;
break;
}
if (need_sync)
if (need_sync) {
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
spa, nvp, 3));
else
return (0);
spa, nvp, 6));
}
return (0);
}
/*
@ -1630,7 +1705,7 @@ spa_load_verify_done(zio_t *zio)
int error = zio->io_error;
if (error) {
if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
type != DMU_OT_INTENT_LOG)
atomic_add_64(&sle->sle_meta_count, 1);
else
@ -1860,6 +1935,9 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
KM_SLEEP) == 0);
}
nvlist_free(spa->spa_load_info);
spa->spa_load_info = fnvlist_alloc();
gethrestime(&spa->spa_loaded_ts);
error = spa_load_impl(spa, pool_guid, config, state, type,
mosconfig, &ereport);
@ -1892,12 +1970,14 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
{
int error = 0;
nvlist_t *nvroot = NULL;
nvlist_t *label;
vdev_t *rvd;
uberblock_t *ub = &spa->spa_uberblock;
uint64_t children, config_cache_txg = spa->spa_config_txg;
int orig_mode = spa->spa_mode;
int parse;
uint64_t obj;
boolean_t missing_feat_write = B_FALSE;
/*
* If this is an untrusted config, access the pool in read-only mode.
@ -1977,19 +2057,78 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
/*
* Find the best uberblock.
*/
vdev_uberblock_load(NULL, rvd, ub);
vdev_uberblock_load(rvd, ub, &label);
/*
* If we weren't able to find a single valid uberblock, return failure.
*/
if (ub->ub_txg == 0)
if (ub->ub_txg == 0) {
nvlist_free(label);
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
}
/*
* If the pool is newer than the code, we can't open it.
* If the pool has an unsupported version we can't open it.
*/
if (ub->ub_version > SPA_VERSION)
if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
nvlist_free(label);
return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
}
if (ub->ub_version >= SPA_VERSION_FEATURES) {
nvlist_t *features;
/*
* If we weren't able to find what's necessary for reading the
* MOS in the label, return failure.
*/
if (label == NULL || nvlist_lookup_nvlist(label,
ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
nvlist_free(label);
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
ENXIO));
}
/*
* Update our in-core representation with the definitive values
* from the label.
*/
nvlist_free(spa->spa_label_features);
VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
}
nvlist_free(label);
/*
* Look through entries in the label nvlist's features_for_read. If
* there is a feature listed there which we don't understand then we
* cannot open a pool.
*/
if (ub->ub_version >= SPA_VERSION_FEATURES) {
nvlist_t *unsup_feat;
VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
0);
for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
NULL); nvp != NULL;
nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
if (!zfeature_is_supported(nvpair_name(nvp))) {
VERIFY(nvlist_add_string(unsup_feat,
nvpair_name(nvp), "") == 0);
}
}
if (!nvlist_empty(unsup_feat)) {
VERIFY(nvlist_add_nvlist(spa->spa_load_info,
ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
nvlist_free(unsup_feat);
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
ENOTSUP));
}
nvlist_free(unsup_feat);
}
/*
* If the vdev guid sum doesn't match the uberblock, we have an
@ -2023,7 +2162,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
spa->spa_claim_max_txg = spa->spa_first_txg;
spa->spa_prev_software_version = ub->ub_software_version;
error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
if (error)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
@ -2031,6 +2170,84 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
if (spa_version(spa) >= SPA_VERSION_FEATURES) {
boolean_t missing_feat_read = B_FALSE;
nvlist_t *unsup_feat;
if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
&spa->spa_feat_for_read_obj) != 0) {
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
}
if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
&spa->spa_feat_for_write_obj) != 0) {
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
}
if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
&spa->spa_feat_desc_obj) != 0) {
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
}
VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
0);
if (!feature_is_supported(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
unsup_feat))
missing_feat_read = B_TRUE;
if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
if (!feature_is_supported(spa->spa_meta_objset,
spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
unsup_feat))
missing_feat_write = B_TRUE;
}
if (!nvlist_empty(unsup_feat)) {
VERIFY(nvlist_add_nvlist(spa->spa_load_info,
ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
}
nvlist_free(unsup_feat);
if (!missing_feat_read) {
fnvlist_add_boolean(spa->spa_load_info,
ZPOOL_CONFIG_CAN_RDONLY);
}
/*
* If the state is SPA_LOAD_TRYIMPORT, our objective is
* twofold: to determine whether the pool is available for
* import in read-write mode and (if it is not) whether the
* pool is available for import in read-only mode. If the pool
* is available for import in read-write mode, it is displayed
* as available in userland; if it is not available for import
* in read-only mode, it is displayed as unavailable in
* userland. If the pool is available for import in read-only
* mode but not read-write mode, it is displayed as unavailable
* in userland with a special note that the pool is actually
* available for open in read-only mode.
*
* As a result, if the state is SPA_LOAD_TRYIMPORT and we are
* missing a feature for write, we must first determine whether
* the pool can be opened read-only before returning to
* userland in order to know whether to display the
* abovementioned note.
*/
if (missing_feat_read || (missing_feat_write &&
spa_writeable(spa))) {
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
ENOTSUP));
}
}
spa->spa_is_initializing = B_TRUE;
error = dsl_pool_open(spa->spa_dsl_pool);
spa->spa_is_initializing = B_FALSE;
if (error != 0)
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
if (!mosconfig) {
uint64_t hostid;
nvlist_t *policy = NULL, *nvconfig;
@ -2248,7 +2465,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
nvlist_free(nvconfig);
/*
* Now that we've validate the config, check the state of the
* Now that we've validated the config, check the state of the
* root vdev. If it can't be opened, it indicates one or
* more toplevel vdevs are faulted.
*/
@ -2261,6 +2478,17 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
}
}
if (missing_feat_write) {
ASSERT(state == SPA_LOAD_TRYIMPORT);
/*
* At this point, we know that we can open the pool in
* read-only mode but not read-write mode. We now have enough
* information and can return to userland.
*/
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
}
/*
* We've successfully opened the pool, verify that we're ready
* to start pushing transactions.
@ -2370,10 +2598,18 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
}
/*
* If spa_load() fails this function will try loading prior txg's. If
* 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
* will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
* function will not rewind the pool and will return the same error as
* spa_load().
*/
static int
spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
uint64_t max_request, int rewind_flags)
{
nvlist_t *loadinfo = NULL;
nvlist_t *config = NULL;
int load_error, rewind_error;
uint64_t safe_rewind_txg;
@ -2402,9 +2638,18 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
return (load_error);
}
/* Price of rolling back is discarding txgs, including log */
if (state == SPA_LOAD_RECOVER)
if (state == SPA_LOAD_RECOVER) {
/* Price of rolling back is discarding txgs, including log */
spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
/*
* If we aren't rolling back save the load info from our first
* import attempt so that we can restore it after attempting
* to rewind.
*/
loadinfo = spa->spa_load_info;
spa->spa_load_info = fnvlist_alloc();
}
spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
@ -2428,7 +2673,20 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
if (config && (rewind_error || state != SPA_LOAD_RECOVER))
spa_config_set(spa, config);
return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
if (state == SPA_LOAD_RECOVER) {
ASSERT3P(loadinfo, ==, NULL);
return (rewind_error);
} else {
/* Store the rewind info as part of the initial load info */
fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
spa->spa_load_info);
/* Restore the initial load info */
fnvlist_free(spa->spa_load_info);
spa->spa_load_info = loadinfo;
return (load_error);
}
}
/*
@ -2707,8 +2965,50 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
}
}
static void
spa_add_feature_stats(spa_t *spa, nvlist_t *config)
{
nvlist_t *features;
zap_cursor_t zc;
zap_attribute_t za;
ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
if (spa->spa_feat_for_read_obj != 0) {
for (zap_cursor_init(&zc, spa->spa_meta_objset,
spa->spa_feat_for_read_obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
ASSERT(za.za_integer_length == sizeof (uint64_t) &&
za.za_num_integers == 1);
VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
za.za_first_integer));
}
zap_cursor_fini(&zc);
}
if (spa->spa_feat_for_write_obj != 0) {
for (zap_cursor_init(&zc, spa->spa_meta_objset,
spa->spa_feat_for_write_obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
ASSERT(za.za_integer_length == sizeof (uint64_t) &&
za.za_num_integers == 1);
VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
za.za_first_integer));
}
zap_cursor_fini(&zc);
}
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
features) == 0);
nvlist_free(features);
}
int
spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
spa_get_stats(const char *name, nvlist_t **config,
char *altroot, size_t buflen)
{
int error;
spa_t *spa;
@ -2743,6 +3043,7 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
spa_add_spares(spa, *config);
spa_add_l2cache(spa, *config);
spa_add_feature_stats(spa, *config);
}
}
@ -2963,6 +3264,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
nvlist_t **spares, **l2cache;
uint_t nspares, nl2cache;
uint64_t version, obj;
boolean_t has_features;
/*
* If this pool already exists, return failure.
@ -2988,10 +3290,18 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
return (error);
}
if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
&version) != 0)
has_features = B_FALSE;
for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
if (zpool_prop_feature(nvpair_name(elem)))
has_features = B_TRUE;
}
if (has_features || nvlist_lookup_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
version = SPA_VERSION;
ASSERT(version <= SPA_VERSION);
}
ASSERT(SPA_VERSION_IS_SUPPORTED(version));
spa->spa_first_txg = txg;
spa->spa_uberblock.ub_txg = txg - 1;
@ -3067,8 +3377,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
spa->spa_l2cache.sav_sync = B_TRUE;
}
spa->spa_is_initializing = B_TRUE;
spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
spa->spa_meta_objset = dp->dp_meta_objset;
spa->spa_is_initializing = B_FALSE;
/*
* Create DDTs (dedup tables).
@ -3092,6 +3404,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
cmn_err(CE_PANIC, "failed to add pool config");
}
if (spa_version(spa) >= SPA_VERSION_FEATURES)
spa_feature_create_zap_objects(spa, tx);
if (zap_add(spa->spa_meta_objset,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
sizeof (uint64_t), 1, &version, tx) != 0) {
@ -3283,7 +3598,7 @@ spa_import_rootpool(char *devpath, char *devid)
}
#endif
if (config == NULL) {
cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
devpath);
return (EIO);
}
@ -3603,6 +3918,8 @@ spa_tryimport(nvlist_t *tryconfig)
state) == 0);
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
spa->spa_uberblock.ub_timestamp) == 0);
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
spa->spa_load_info) == 0);
/*
* If the bootfs property exists on this pool then we
@ -5328,7 +5645,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
* information. This avoids the dbuf_will_dirty() path and
* saves us a pre-read to get data we don't actually care about.
*/
bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
packed = kmem_alloc(bufsize, KM_SLEEP);
VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
@ -5413,6 +5730,24 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
}
static void
spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
{
spa_t *spa = arg1;
uint64_t version = *(uint64_t *)arg2;
/*
* Setting the version is special cased when first creating the pool.
*/
ASSERT(tx->tx_txg != TXG_INITIAL);
ASSERT(version <= SPA_VERSION);
ASSERT(version >= spa_version(spa));
spa->spa_uberblock.ub_version = version;
vdev_config_dirty(spa->spa_root_vdev);
}
/*
* Set zpool properties.
*/
@ -5422,32 +5757,38 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
spa_t *spa = arg1;
objset_t *mos = spa->spa_meta_objset;
nvlist_t *nvp = arg2;
nvpair_t *elem;
uint64_t intval;
char *strval;
zpool_prop_t prop;
const char *propname;
zprop_type_t proptype;
nvpair_t *elem = NULL;
mutex_enter(&spa->spa_props_lock);
elem = NULL;
while ((elem = nvlist_next_nvpair(nvp, elem))) {
uint64_t intval;
char *strval, *fname;
zpool_prop_t prop;
const char *propname;
zprop_type_t proptype;
zfeature_info_t *feature;
switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
case ZPOOL_PROP_VERSION:
case ZPROP_INVAL:
/*
* Only set version for non-zpool-creation cases
* (set/import). spa_create() needs special care
* for version setting.
* We checked this earlier in spa_prop_validate().
*/
if (tx->tx_txg != TXG_INITIAL) {
VERIFY(nvpair_value_uint64(elem,
&intval) == 0);
ASSERT(intval <= SPA_VERSION);
ASSERT(intval >= spa_version(spa));
spa->spa_uberblock.ub_version = intval;
vdev_config_dirty(spa->spa_root_vdev);
}
ASSERT(zpool_prop_feature(nvpair_name(elem)));
fname = strchr(nvpair_name(elem), '@') + 1;
VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
spa_feature_enable(spa, feature, tx);
break;
case ZPOOL_PROP_VERSION:
VERIFY(nvpair_value_uint64(elem, &intval) == 0);
/*
* The version is synced seperatly before other
* properties and should be correct by now.
*/
ASSERT3U(spa_version(spa), >=, intval);
break;
case ZPOOL_PROP_ALTROOT:
@ -5484,14 +5825,10 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
* Set pool property values in the poolprops mos object.
*/
if (spa->spa_pool_props_object == 0) {
VERIFY((spa->spa_pool_props_object =
zap_create(mos, DMU_OT_POOL_PROPS,
DMU_OT_NONE, 0, tx)) > 0);
VERIFY(zap_update(mos,
spa->spa_pool_props_object =
zap_create_link(mos, DMU_OT_POOL_PROPS,
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
8, 1, &spa->spa_pool_props_object, tx)
== 0);
tx);
}
/* normalize the property name */
@ -5590,6 +5927,11 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
/* Keeping the freedir open increases spa_minref */
spa->spa_minref += 3;
}
if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
spa_feature_create_zap_objects(spa, tx);
}
}
/*

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -35,6 +35,7 @@
#include <sys/zfs_ioctl.h>
#include <sys/utsname.h>
#include <sys/sunddi.h>
#include <sys/zfeature.h>
#ifdef _KERNEL
#include <sys/kobj.h>
#include <sys/zone.h>
@ -407,6 +408,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
nvlist_free(nvroot);
/*
* Store what's necessary for reading the MOS in the label.
*/
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
spa->spa_label_features) == 0);
if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
ddt_histogram_t *ddh;
ddt_stat_t *dds;

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@ -48,6 +48,7 @@
#include <sys/arc.h>
#include <sys/ddt.h>
#include "zfs_prop.h"
#include "zfeature_common.h"
/*
* SPA locking
@ -216,7 +217,7 @@
* Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
*
* spa_rename() is also implemented within this file since is requires
* spa_rename() is also implemented within this file since it requires
* manipulation of the namespace.
*/
@ -487,8 +488,22 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
KM_SLEEP) == 0);
if (config != NULL)
if (config != NULL) {
nvlist_t *features;
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
&features) == 0) {
VERIFY(nvlist_dup(features, &spa->spa_label_features,
0) == 0);
}
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
}
if (spa->spa_label_features == NULL) {
VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
KM_SLEEP) == 0);
}
return (spa);
}
@ -525,6 +540,7 @@ spa_remove(spa_t *spa)
list_destroy(&spa->spa_config_list);
nvlist_free(spa->spa_label_features);
nvlist_free(spa->spa_load_info);
spa_config_set(spa, NULL);
@ -1033,6 +1049,20 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
* ==========================================================================
*/
void
spa_activate_mos_feature(spa_t *spa, const char *feature)
{
(void) nvlist_add_boolean(spa->spa_label_features, feature);
vdev_config_dirty(spa->spa_root_vdev);
}
void
spa_deactivate_mos_feature(spa_t *spa, const char *feature)
{
(void) nvlist_remove_all(spa->spa_label_features, feature);
vdev_config_dirty(spa->spa_root_vdev);
}
/*
* Rename a spa_t.
*/
@ -1183,12 +1213,22 @@ spa_generate_guid(spa_t *spa)
void
sprintf_blkptr(char *buf, const blkptr_t *bp)
{
char *type = NULL;
char type[256];
char *checksum = NULL;
char *compress = NULL;
if (bp != NULL) {
type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
dmu_object_byteswap_t bswap =
DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
(void) snprintf(type, sizeof (type), "bswap %s %s",
DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
"metadata" : "data",
dmu_ot_byteswap[bswap].ob_name);
} else {
(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
sizeof (type));
}
checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
}
@ -1270,6 +1310,12 @@ spa_get_dsl(spa_t *spa)
return (spa->spa_dsl_pool);
}
boolean_t
spa_is_initializing(spa_t *spa)
{
return (spa->spa_is_initializing);
}
blkptr_t *
spa_get_rootblkptr(spa_t *spa)
{
@ -1553,6 +1599,7 @@ spa_init(int mode)
vdev_cache_stat_init();
zfs_prop_init();
zpool_prop_init();
zpool_feature_init();
spa_config_load();
l2arc_start();
}

View File

@ -0,0 +1,64 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_BPTREE_H
#define _SYS_BPTREE_H
#include <sys/spa.h>
#include <sys/zio.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct bptree_phys {
uint64_t bt_begin;
uint64_t bt_end;
uint64_t bt_bytes;
uint64_t bt_comp;
uint64_t bt_uncomp;
} bptree_phys_t;
typedef struct bptree_entry_phys {
blkptr_t be_bp;
uint64_t be_birth_txg; /* only delete blocks born after this txg */
zbookmark_t be_zb; /* holds traversal resume point if needed */
} bptree_entry_phys_t;
typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx);
int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx);
int bptree_iterate(objset_t *os, uint64_t obj, boolean_t free,
bptree_itor_t func, void *arg, dmu_tx_t *tx);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_BPTREE_H */

View File

@ -18,11 +18,10 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@ -75,6 +74,53 @@ typedef struct objset objset_t;
typedef struct dmu_tx dmu_tx_t;
typedef struct dsl_dir dsl_dir_t;
typedef enum dmu_object_byteswap {
DMU_BSWAP_UINT8,
DMU_BSWAP_UINT16,
DMU_BSWAP_UINT32,
DMU_BSWAP_UINT64,
DMU_BSWAP_ZAP,
DMU_BSWAP_DNODE,
DMU_BSWAP_OBJSET,
DMU_BSWAP_ZNODE,
DMU_BSWAP_OLDACL,
DMU_BSWAP_ACL,
/*
* Allocating a new byteswap type number makes the on-disk format
* incompatible with any other format that uses the same number.
*
* Data can usually be structured to work with one of the
* DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
*/
DMU_BSWAP_NUMFUNCS
} dmu_object_byteswap_t;
#define DMU_OT_NEWTYPE 0x80
#define DMU_OT_METADATA 0x40
#define DMU_OT_BYTESWAP_MASK 0x3f
/*
* Defines a uint8_t object type. Object types specify if the data
* in the object is metadata (boolean) and how to byteswap the data
* (dmu_object_byteswap_t).
*/
#define DMU_OT(byteswap, metadata) \
(DMU_OT_NEWTYPE | \
((metadata) ? DMU_OT_METADATA : 0) | \
((byteswap) & DMU_OT_BYTESWAP_MASK))
#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
(ot) < DMU_OT_NUMTYPES)
#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
((ot) & DMU_OT_METADATA) : \
dmu_ot[(ot)].ot_metadata)
#define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
((ot) & DMU_OT_BYTESWAP_MASK) : \
dmu_ot[(ot)].ot_byteswap)
typedef enum dmu_object_type {
DMU_OT_NONE,
/* general: */
@ -139,7 +185,35 @@ typedef enum dmu_object_type {
DMU_OT_DEADLIST_HDR, /* UINT64 */
DMU_OT_DSL_CLONES, /* ZAP */
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
DMU_OT_NUMTYPES
/*
* Do not allocate new object types here. Doing so makes the on-disk
* format incompatible with any other format that uses the same object
* type number.
*
* When creating an object which does not have one of the above types
* use the DMU_OTN_* type with the correct byteswap and metadata
* values.
*
* The DMU_OTN_* types do not have entries in the dmu_ot table,
* use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
* of indexing into dmu_ot directly (this works for both DMU_OT_* types
* and DMU_OTN_* types).
*/
DMU_OT_NUMTYPES,
/*
* Names for valid types declared with DMU_OT().
*/
DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
} dmu_object_type_t;
typedef enum dmu_objset_type {
@ -221,6 +295,9 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
*/
#define DMU_POOL_DIRECTORY_OBJECT 1
#define DMU_POOL_CONFIG "config"
#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write"
#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions"
#define DMU_POOL_ROOT_DATASET "root_dataset"
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
@ -236,6 +313,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_CREATION_VERSION "creation_version"
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
/*
* Allocate an object from this objset. The range of object numbers
@ -496,7 +574,7 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
/*
* Free up the data blocks for a defined range of a file. If size is
* zero, the range from offset to end-of-file is freed.
* -1, the range from offset to end-of-file is freed.
*/
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
uint64_t size, dmu_tx_t *tx);
@ -566,12 +644,18 @@ typedef struct dmu_object_info {
typedef void arc_byteswap_func_t(void *buf, size_t size);
typedef struct dmu_object_type_info {
arc_byteswap_func_t *ot_byteswap;
dmu_object_byteswap_t ot_byteswap;
boolean_t ot_metadata;
char *ot_name;
} dmu_object_type_info_t;
typedef struct dmu_object_byteswap_info {
arc_byteswap_func_t *ob_func;
char *ob_name;
} dmu_object_byteswap_info_t;
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
/*
* Get information on a DMU object.

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DMU_TRAVERSE_H
@ -54,6 +55,9 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
int traverse_dataset(struct dsl_dataset *ds,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
uint64_t txg_start, zbookmark_t *resume, int flags,
blkptr_cb_t func, void *arg);
int traverse_pool(spa_t *spa,
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);

View File

@ -22,7 +22,7 @@
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>.
* All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
*/
@ -88,7 +88,12 @@ typedef struct dsl_dataset_phys {
uint64_t ds_creation_time; /* seconds since 1970 */
uint64_t ds_creation_txg;
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
uint64_t ds_used_bytes;
/*
* ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
* include all blocks referenced by this dataset, including those
* shared with any other datasets.
*/
uint64_t ds_referenced_bytes;
uint64_t ds_compressed_bytes;
uint64_t ds_uncompressed_bytes;
uint64_t ds_unique_bytes; /* only relevant to snapshots */

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_POOL_H
@ -34,6 +35,7 @@
#include <sys/ddt.h>
#include <sys/arc.h>
#include <sys/bpobj.h>
#include <sys/bptree.h>
#ifdef __cplusplus
extern "C" {
@ -48,7 +50,8 @@ struct dsl_scan;
/* These macros are for indexing into the zfs_all_blkstats_t. */
#define DMU_OT_DEFERRED DMU_OT_NONE
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
#define DMU_OT_OTHER DMU_OT_NUMTYPES /* place holder for DMU_OT() types */
#define DMU_OT_TOTAL (DMU_OT_NUMTYPES + 1)
typedef struct zfs_blkstat {
uint64_t zb_count;
@ -85,6 +88,7 @@ typedef struct dsl_pool {
uint64_t dp_write_limit;
uint64_t dp_tmp_userrefs_obj;
bpobj_t dp_free_bpobj;
uint64_t dp_bptree_obj;
struct dsl_scan *dp_scan;
@ -110,7 +114,8 @@ typedef struct dsl_pool {
zfs_all_blkstats_t *dp_blkstats;
} dsl_pool_t;
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
int dsl_pool_open(dsl_pool_t *dp);
void dsl_pool_close(dsl_pool_t *dp);
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_DSL_SCAN_H
@ -79,6 +80,9 @@ typedef struct dsl_scan {
uint64_t scn_sync_start_time;
zio_t *scn_zio_root;
/* for freeing blocks */
boolean_t scn_is_bptree;
/* for debugging / information */
uint64_t scn_visited_this_txg;

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@ -94,7 +94,7 @@ struct dsl_pool;
/*
* Size of block to hold the configuration data (a packed nvlist)
*/
#define SPA_CONFIG_BLOCKSIZE (1 << 14)
#define SPA_CONFIG_BLOCKSIZE (1ULL << 14)
/*
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
@ -262,7 +262,7 @@ typedef struct blkptr {
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
#define BP_GET_UCSIZE(bp) \
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
#define BP_GET_NDVAS(bp) \
@ -403,8 +403,8 @@ typedef struct blkptr {
#include <sys/dmu.h>
#define BP_GET_BUFC_TYPE(bp) \
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA);
(((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
ARC_BUFC_METADATA : ARC_BUFC_DATA)
typedef enum spa_import_type {
SPA_IMPORT_EXISTING,
@ -415,8 +415,8 @@ typedef enum spa_import_type {
extern int spa_open(const char *pool, spa_t **, void *tag);
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
nvlist_t *policy, nvlist_t **config);
extern int spa_get_stats(const char *pool, nvlist_t **config,
char *altroot, size_t buflen);
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
size_t buflen);
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
const char *history_str, nvlist_t *zplprops);
extern int spa_import_rootpool(char *devpath, char *devid);
@ -573,6 +573,7 @@ extern void spa_claim_notify(zio_t *zio);
/* Accessor functions */
extern boolean_t spa_shutting_down(spa_t *spa);
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
extern boolean_t spa_is_initializing(spa_t *spa);
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
extern void spa_altroot(spa_t *, char *, size_t);
@ -604,6 +605,8 @@ extern uint64_t spa_delegation(spa_t *spa);
extern objset_t *spa_meta_objset(spa_t *spa);
/* Miscellaneous support routines */
extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
extern int spa_rename(const char *oldname, const char *newname);
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
*/
@ -127,6 +127,7 @@ struct spa {
uint64_t spa_import_flags; /* import specific flags */
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
dsl_pool_t *spa_dsl_pool;
boolean_t spa_is_initializing; /* true while opening pool */
metaslab_class_t *spa_normal_class; /* normal data class */
metaslab_class_t *spa_log_class; /* intent log data class */
uint64_t spa_first_txg; /* first txg after spa_open() */
@ -144,6 +145,7 @@ struct spa {
list_t spa_state_dirty_list; /* vdevs with dirty state */
spa_aux_vdev_t spa_spares; /* hot spares */
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
nvlist_t *spa_label_features; /* Features for reading MOS */
uint64_t spa_config_object; /* MOS object for pool config */
uint64_t spa_config_generation; /* config generation number */
uint64_t spa_syncing_txg; /* txg currently syncing */
@ -220,7 +222,10 @@ struct spa {
boolean_t spa_autoreplace; /* autoreplace set in open */
int spa_vdev_locks; /* locks grabbed */
uint64_t spa_creation_version; /* version at pool creation */
uint64_t spa_prev_software_version;
uint64_t spa_prev_software_version; /* See ub_software_version */
uint64_t spa_feat_for_write_obj; /* required to write to pool */
uint64_t spa_feat_for_read_obj; /* required to read from pool */
uint64_t spa_feat_desc_obj; /* Feature descriptions */
/*
* spa_refcnt & spa_config_lock must be the last elements
* because refcount_t changes size based on compilation options.

View File

@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
@ -141,8 +142,8 @@ extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
struct uberblock;
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
extern int vdev_label_number(uint64_t psise, uint64_t offset);
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label);
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
typedef enum {
VDEV_LABEL_CREATE, /* create/add a new device */

View File

@ -202,7 +202,7 @@ struct vdev {
* For DTrace to work in userland (libzpool) context, these fields must
* remain at the end of the structure. DTrace will use the kernel's
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
* larger in userland, the offsets for the rest fields would be
* larger in userland, the offsets for the rest of the fields would be
* incorrect.
*/
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
@ -257,6 +257,7 @@ typedef struct vdev_label {
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
#define VDEV_LABELS 4
#define VDEV_BEST_LABEL VDEV_LABELS
#define VDEV_ALLOC_LOAD 0
#define VDEV_ALLOC_ADD 1

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZAP_H
@ -132,6 +133,8 @@ uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
uint64_t parent_obj, const char *name, dmu_tx_t *tx);
/*
* Create a new zapobj with no attributes from the given (unallocated)
@ -300,12 +303,6 @@ int zap_add_int_key(objset_t *os, uint64_t obj,
int zap_lookup_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t *valuep);
/*
* They name is a stringified version of key; increment its value by
* delta. Zero values will be zap_remove()-ed.
*/
int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
dmu_tx_t *tx);
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
dmu_tx_t *tx);

View File

@ -0,0 +1,52 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_ZFEATURE_H
#define _SYS_ZFEATURE_H
#include <sys/dmu.h>
#include <sys/nvpair.h>
#include "zfeature_common.h"
#ifdef __cplusplus
extern "C" {
#endif
extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
uint64_t desc_obj, nvlist_t *unsup_feat);
struct spa;
extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
#ifdef __cplusplus
}
#endif
#endif /* _SYS_ZFEATURE_H */

View File

@ -21,6 +21,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _ZIO_H
@ -270,6 +271,14 @@ typedef struct zbookmark {
#define ZB_ZIL_OBJECT (0ULL)
#define ZB_ZIL_LEVEL (-2LL)
#define ZB_IS_ZERO(zb) \
((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
(zb)->zb_level == 0 && (zb)->zb_blkid == 0)
#define ZB_IS_ROOT(zb) \
((zb)->zb_object == ZB_ROOT_OBJECT && \
(zb)->zb_level == ZB_ROOT_LEVEL && \
(zb)->zb_blkid == ZB_ROOT_BLKID)
typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
@ -287,6 +296,7 @@ typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
typedef void zio_cksum_free_f(void *cbdata, size_t size);
struct zio_bad_cksum; /* defined in zio_checksum.h */
struct dnode_phys;
struct zio_cksum_report {
struct zio_cksum_report *zcr_next;
@ -559,6 +569,10 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);
/* zbookmark functions */
boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
const zbookmark_t *zb1, const zbookmark_t *zb2);
#ifdef __cplusplus
}
#endif

View File

@ -1329,7 +1329,8 @@ vdev_validate(vdev_t *vd, boolean_t strict)
uint64_t aux_guid = 0;
nvlist_t *nvl;
if ((label = vdev_label_read_config(vd)) == NULL) {
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) ==
NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_BAD_LABEL);
return (0);
@ -1970,14 +1971,14 @@ vdev_validate_aux(vdev_t *vd)
if (!vdev_readable(vd))
return (0);
if ((label = vdev_label_read_config(vd)) == NULL) {
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) {
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
VDEV_AUX_CORRUPT_DATA);
return (-1);
}
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
version > SPA_VERSION ||
!SPA_VERSION_IS_SUPPORTED(version) ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
guid != vd->vdev_guid ||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {

View File

@ -18,8 +18,10 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -121,6 +123,8 @@
* txg Transaction group in which this label was written
* pool_guid Unique identifier for this pool
* vdev_tree An nvlist describing vdev tree.
* features_for_read
* An nvlist of the features necessary for reading the MOS.
*
* Each leaf device label also contains the following:
*
@ -428,8 +432,13 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
}
/*
* Returns the configuration from the label of the given vdev. If 'label' is
* VDEV_BEST_LABEL, each label of the vdev will be read until a valid
* configuration is found; otherwise, only the specified label will be read.
*/
nvlist_t *
vdev_label_read_config(vdev_t *vd)
vdev_label_read_config(vdev_t *vd, int label)
{
spa_t *spa = vd->vdev_spa;
nvlist_t *config = NULL;
@ -447,6 +456,8 @@ vdev_label_read_config(vdev_t *vd)
retry:
for (int l = 0; l < VDEV_LABELS; l++) {
if (label >= 0 && label < VDEV_LABELS && label != l)
continue;
zio = zio_root(spa, NULL, NULL, flags);
@ -496,7 +507,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
/*
* Read the label, if any, and perform some basic sanity checks.
*/
if ((label = vdev_label_read_config(vd)) == NULL)
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL)
return (B_FALSE);
(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
@ -833,7 +844,7 @@ retry:
* come back up, we fail to see the uberblock for txg + 1 because, say,
* it was on a mirrored device and the replica to which we wrote txg + 1
* is now offline. If we then make some changes and sync txg + 1, and then
* the missing replica comes back, then for a new seconds we'll have two
* the missing replica comes back, then for a few seconds we'll have two
* conflicting uberblocks on disk with the same txg. The solution is simple:
* among uberblocks with equal txg, choose the one with the latest timestamp.
*/
@ -853,46 +864,50 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
return (0);
}
struct ubl_cbdata {
uberblock_t *ubl_ubbest; /* Best uberblock */
vdev_t *ubl_vd; /* vdev associated with the above */
int ubl_label; /* Label associated with the above */
};
static void
vdev_uberblock_load_done(zio_t *zio)
{
vdev_t *vd = zio->io_vd;
spa_t *spa = zio->io_spa;
zio_t *rio = zio->io_private;
uberblock_t *ub = zio->io_data;
uberblock_t *ubbest = rio->io_private;
struct ubl_cbdata *cbp = rio->io_private;
ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd));
if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
mutex_enter(&rio->io_lock);
if (ub->ub_txg <= spa->spa_load_max_txg &&
vdev_uberblock_compare(ub, ubbest) > 0)
*ubbest = *ub;
vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
/*
* Keep track of the vdev and label in which this
* uberblock was found. We will use this information
* later to obtain the config nvlist associated with
* this uberblock.
*/
*cbp->ubl_ubbest = *ub;
cbp->ubl_vd = vd;
cbp->ubl_label = vdev_label_number(vd->vdev_psize,
zio->io_offset);
}
mutex_exit(&rio->io_lock);
}
zio_buf_free(zio->io_data, zio->io_size);
}
void
vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
static void
vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
struct ubl_cbdata *cbp)
{
spa_t *spa = vd->vdev_spa;
vdev_t *rvd = spa->spa_root_vdev;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
if (vd == rvd) {
ASSERT(zio == NULL);
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
zio = zio_root(spa, NULL, ubbest, flags);
bzero(ubbest, sizeof (uberblock_t));
}
ASSERT(zio != NULL);
for (int c = 0; c < vd->vdev_children; c++)
vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
for (int l = 0; l < VDEV_LABELS; l++) {
@ -905,11 +920,45 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
}
}
}
}
if (vd == rvd) {
(void) zio_wait(zio);
spa_config_exit(spa, SCL_ALL, FTAG);
/*
* Reads the 'best' uberblock from disk along with its associated
* configuration. First, we read the uberblock array of each label of each
* vdev, keeping track of the uberblock with the highest txg in each array.
* Then, we read the configuration from the same label as the best uberblock.
*/
void
vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
{
int i;
zio_t *zio;
spa_t *spa = rvd->vdev_spa;
struct ubl_cbdata cb;
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
ASSERT(ub);
ASSERT(config);
bzero(ub, sizeof (uberblock_t));
*config = NULL;
cb.ubl_ubbest = ub;
cb.ubl_vd = NULL;
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
zio = zio_root(spa, NULL, &cb, flags);
vdev_uberblock_load_impl(zio, rvd, flags, &cb);
(void) zio_wait(zio);
if (cb.ubl_vd != NULL) {
for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) {
*config = vdev_label_read_config(cb.ubl_vd, i);
if (*config != NULL)
break;
}
}
spa_config_exit(spa, SCL_ALL, FTAG);
}
/*

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
/*
@ -946,6 +947,19 @@ fzap_prefetch(zap_name_t *zn)
* Helper functions for consumers.
*/
uint64_t
zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
const char *name, dmu_tx_t *tx)
{
uint64_t new_obj;
VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
tx) == 0);
return (new_obj);
}
int
zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
char *name)

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zio.h>
@ -472,7 +472,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
}
#endif
@ -596,7 +596,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
{
dmu_object_info_t doi;
dmu_object_info_from_db(db, &doi);
ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
}
#endif

View File

@ -0,0 +1,414 @@
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
#include <sys/zfeature.h>
#include <sys/dmu.h>
#include <sys/nvpair.h>
#include <sys/zap.h>
#include <sys/dmu_tx.h>
#include "zfeature_common.h"
#include <sys/spa_impl.h>
/*
* ZFS Feature Flags
* -----------------
*
* ZFS feature flags are used to provide fine-grained versioning to the ZFS
* on-disk format. Once enabled on a pool feature flags replace the old
* spa_version() number.
*
* Each new on-disk format change will be given a uniquely identifying string
* guid rather than a version number. This avoids the problem of different
* organizations creating new on-disk formats with the same version number. To
* keep feature guids unique they should consist of the reverse dns name of the
* organization which implemented the feature and a short name for the feature,
* separated by a colon (e.g. com.delphix:async_destroy).
*
* Reference Counts
* ----------------
*
* Within each pool features can be in one of three states: disabled, enabled,
* or active. These states are differentiated by a reference count stored on
* disk for each feature:
*
* 1) If there is no reference count stored on disk the feature is disabled.
* 2) If the reference count is 0 a system administrator has enabled the
* feature, but the feature has not been used yet, so no on-disk
* format changes have been made.
* 3) If the reference count is greater than 0 the feature is active.
* The format changes required by the feature are currently on disk.
* Note that if the feature's format changes are reversed the feature
* may choose to set its reference count back to 0.
*
* Feature flags makes no differentiation between non-zero reference counts
* for an active feature (e.g. a reference count of 1 means the same thing as a
* reference count of 27834721), but feature implementations may choose to use
* the reference count to store meaningful information. For example, a new RAID
* implementation might set the reference count to the number of vdevs using
* it. If all those disks are removed from the pool the feature goes back to
* having a reference count of 0.
*
* It is the responsibility of the individual features to maintain a non-zero
* reference count as long as the feature's format changes are present on disk.
*
* Dependencies
* ------------
*
* Each feature may depend on other features. The only effect of this
* relationship is that when a feature is enabled all of its dependencies are
* automatically enabled as well. Any future work to support disabling of
* features would need to ensure that features cannot be disabled if other
* enabled features depend on them.
*
* On-disk Format
* --------------
*
* When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES
* (5000). In order for this to work the pool is automatically upgraded to
* SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk
* format changes will be in use.
*
* Information about features is stored in 3 ZAP objects in the pool's MOS.
* These objects are linked to by the following names in the pool directory
* object:
*
* 1) features_for_read: feature guid -> reference count
* Features needed to open the pool for reading.
* 2) features_for_write: feature guid -> reference count
* Features needed to open the pool for writing.
* 3) feature_descriptions: feature guid -> descriptive string
* A human readable string.
*
* All enabled features appear in either features_for_read or
* features_for_write, but not both.
*
* To open a pool in read-only mode only the features listed in
* features_for_read need to be supported.
*
* To open the pool in read-write mode features in both features_for_read and
* features_for_write need to be supported.
*
* Some features may be required to read the ZAP objects containing feature
* information. To allow software to check for compatibility with these features
* before the pool is opened their names must be stored in the label in a
* new "features_for_read" entry (note that features that are only required
* to write to a pool never need to be stored in the label since the
* features_for_write ZAP object can be read before the pool is written to).
* To save space in the label features must be explicitly marked as needing to
* be written to the label. Also, reference counts are not stored in the label,
* instead any feature whose reference count drops to 0 is removed from the
* label.
*
* Adding New Features
* -------------------
*
* Features must be registered in zpool_feature_init() function in
* zfeature_common.c using the zfeature_register() function. This function
* has arguments to specify if the feature should be stored in the
* features_for_read or features_for_write ZAP object and if it needs to be
* written to the label when active.
*
* Once a feature is registered it will appear as a "feature@<feature name>"
* property which can be set by an administrator. Feature implementors should
* use the spa_feature_is_enabled() and spa_feature_is_active() functions to
* query the state of a feature and the spa_feature_incr() and
* spa_feature_decr() functions to change an enabled feature's reference count.
* Reference counts may only be updated in the syncing context.
*
* Features may not perform enable-time initialization. Instead, any such
* initialization should occur when the feature is first used. This design
* enforces that on-disk changes be made only when features are used. Code
* should only check if a feature is enabled using spa_feature_is_enabled(),
* not by relying on any feature specific metadata existing. If a feature is
* enabled, but the feature's metadata is not on disk yet then it should be
* created as needed.
*
* As an example, consider the com.delphix:async_destroy feature. This feature
* relies on the existence of a bptree in the MOS that store blocks for
* asynchronous freeing. This bptree is not created when async_destroy is
* enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is
* called to check if async_destroy is enabled. If it is and the bptree object
* does not exist yet, the bptree object is created as part of the dataset
* destroy and async_destroy's reference count is incremented to indicate it
* has made an on-disk format change. Later, after the destroyed dataset's
* blocks have all been asynchronously freed there is no longer any use for the
* bptree object, so it is destroyed and async_destroy's reference count is
* decremented back to 0 to indicate that it has undone its on-disk format
* changes.
*/
typedef enum {
FEATURE_ACTION_ENABLE,
FEATURE_ACTION_INCR,
FEATURE_ACTION_DECR,
} feature_action_t;
/*
* Checks that the features active in the specified object are supported by
* this software. Adds each unsupported feature (name -> description) to
* the supplied nvlist.
*/
boolean_t
feature_is_supported(objset_t *os, uint64_t obj, uint64_t desc_obj,
nvlist_t *unsup_feat)
{
boolean_t supported;
zap_cursor_t zc;
zap_attribute_t za;
supported = B_TRUE;
for (zap_cursor_init(&zc, os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
ASSERT(za.za_integer_length == sizeof (uint64_t) &&
za.za_num_integers == 1);
if (za.za_first_integer != 0 &&
!zfeature_is_supported(za.za_name)) {
supported = B_FALSE;
if (unsup_feat != NULL) {
char *desc = "";
char buf[MAXPATHLEN];
if (zap_lookup(os, desc_obj, za.za_name,
1, sizeof (buf), buf) == 0)
desc = buf;
VERIFY(nvlist_add_string(unsup_feat, za.za_name,
desc) == 0);
}
}
}
zap_cursor_fini(&zc);
return (supported);
}
static int
feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj,
zfeature_info_t *feature, uint64_t *res)
{
int err;
uint64_t refcount;
uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
ASSERT(0 != zapobj);
err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
&refcount);
if (err != 0) {
if (err == ENOENT)
return (ENOTSUP);
else
return (err);
}
*res = refcount;
return (0);
}
static int
feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj,
uint64_t desc_obj, zfeature_info_t *feature, feature_action_t action,
dmu_tx_t *tx)
{
int error;
uint64_t refcount;
uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
ASSERT(0 != zapobj);
ASSERT(zfeature_is_valid_guid(feature->fi_guid));
error = zap_lookup(os, zapobj, feature->fi_guid,
sizeof (uint64_t), 1, &refcount);
/*
* If we can't ascertain the status of the specified feature, an I/O
* error occurred.
*/
if (error != 0 && error != ENOENT)
return (error);
switch (action) {
case FEATURE_ACTION_ENABLE:
/*
* If the feature is already enabled, ignore the request.
*/
if (error == 0)
return (0);
refcount = 0;
break;
case FEATURE_ACTION_INCR:
if (error == ENOENT)
return (ENOTSUP);
if (refcount == UINT64_MAX)
return (EOVERFLOW);
refcount++;
break;
case FEATURE_ACTION_DECR:
if (error == ENOENT)
return (ENOTSUP);
if (refcount == 0)
return (EOVERFLOW);
refcount--;
break;
default:
ASSERT(0);
break;
}
if (action == FEATURE_ACTION_ENABLE) {
int i;
for (i = 0; feature->fi_depends[i] != NULL; i++) {
zfeature_info_t *dep = feature->fi_depends[i];
error = feature_do_action(os, read_obj, write_obj,
desc_obj, dep, FEATURE_ACTION_ENABLE, tx);
if (error != 0)
return (error);
}
}
error = zap_update(os, zapobj, feature->fi_guid,
sizeof (uint64_t), 1, &refcount, tx);
if (error != 0)
return (error);
if (action == FEATURE_ACTION_ENABLE) {
error = zap_update(os, desc_obj,
feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
feature->fi_desc, tx);
if (error != 0)
return (error);
}
if (action == FEATURE_ACTION_INCR && refcount == 1 && feature->fi_mos) {
spa_activate_mos_feature(dmu_objset_spa(os), feature->fi_guid);
}
if (action == FEATURE_ACTION_DECR && refcount == 0) {
spa_deactivate_mos_feature(dmu_objset_spa(os),
feature->fi_guid);
}
return (0);
}
void
spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
{
/*
* We create feature flags ZAP objects in two instances: during pool
* creation and during pool upgrade.
*/
ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
tx->tx_txg == TXG_INITIAL));
spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FEATURES_FOR_READ, tx);
spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FEATURES_FOR_WRITE, tx);
spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_FEATURE_DESCRIPTIONS, tx);
}
/*
* Enable any required dependencies, then enable the requested feature.
*/
void
spa_feature_enable(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
{
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
spa->spa_feat_desc_obj, feature, FEATURE_ACTION_ENABLE, tx));
}
/*
* If the specified feature has not yet been enabled, this function returns
* ENOTSUP; otherwise, this function increments the feature's refcount (or
* returns EOVERFLOW if the refcount cannot be incremented). This function must
* be called from syncing context.
*/
void
spa_feature_incr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
{
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
spa->spa_feat_desc_obj, feature, FEATURE_ACTION_INCR, tx));
}
/*
* If the specified feature has not yet been enabled, this function returns
* ENOTSUP; otherwise, this function decrements the feature's refcount (or
* returns EOVERFLOW if the refcount is already 0). This function must
* be called from syncing context.
*/
void
spa_feature_decr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
{
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
spa->spa_feat_desc_obj, feature, FEATURE_ACTION_DECR, tx));
}
boolean_t
spa_feature_is_enabled(spa_t *spa, zfeature_info_t *feature)
{
int err;
uint64_t refcount;
if (spa_version(spa) < SPA_VERSION_FEATURES)
return (B_FALSE);
err = feature_get_refcount(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
feature, &refcount);
ASSERT(err == 0 || err == ENOTSUP);
return (err == 0);
}
boolean_t
spa_feature_is_active(spa_t *spa, zfeature_info_t *feature)
{
int err;
uint64_t refcount;
if (spa_version(spa) < SPA_VERSION_FEATURES)
return (B_FALSE);
err = feature_get_refcount(spa->spa_meta_objset,
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
feature, &refcount);
ASSERT(err == 0 || err == ENOTSUP);
return (err == 0 && refcount > 0);
}

View File

@ -18,6 +18,7 @@
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011-2012 Pawel Jakub Dawidek <pawel@dawidek.net>.
@ -1157,6 +1158,8 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
/*
* Find a zfsvfs_t for a mounted filesystem, or create our own, in which
* case its z_vfs will be NULL, and it will be opened as the owner.
* If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
* which prevents all vnode ops from running.
*/
static int
zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
@ -1220,7 +1223,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
(void) nvlist_lookup_uint64(props,
zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
if (!SPA_VERSION_IS_SUPPORTED(version)) {
error = EINVAL;
goto pool_props_bad;
}
@ -1344,6 +1347,15 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
return (error);
}
/*
* inputs:
* zc_name name of the pool
*
* outputs:
* zc_cookie real errno
* zc_nvlist_dst config nvlist
* zc_nvlist_dst_size size of config nvlist
*/
static int
zfs_ioc_pool_stats(zfs_cmd_t *zc)
{
@ -1445,7 +1457,8 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
return (error);
if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
if (zc->zc_cookie < spa_version(spa) ||
!SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
spa_close(spa, FTAG);
return (EINVAL);
}
@ -5452,7 +5465,7 @@ zfs_modevent(module_t mod, int type, void *unused __unused)
tsd_create(&zfs_fsyncer_key, NULL);
tsd_create(&rrw_tsd_key, NULL);
printf("ZFS storage pool version " SPA_VERSION_STRING "\n");
printf("ZFS storage pool version: features support (" SPA_VERSION_STRING ")\n");
root_mount_rel(zfs_root_token);
zfsdev_init();

View File

@ -2278,7 +2278,7 @@ void
zfs_init(void)
{
printf("ZFS filesystem version " ZPL_VERSION_STRING "\n");
printf("ZFS filesystem version: " ZPL_VERSION_STRING "\n");
/*
* Initialize .zfs directory structures

View File

@ -640,7 +640,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
zp->zp_compress >= ZIO_COMPRESS_OFF &&
zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
zp->zp_type < DMU_OT_NUMTYPES &&
DMU_OT_IS_VALID(zp->zp_type) &&
zp->zp_level < 32 &&
zp->zp_copies > 0 &&
zp->zp_copies <= spa_max_replication(spa) &&
@ -924,7 +924,7 @@ zio_read_bp_init(zio_t *zio)
zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
}
if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
@ -3015,3 +3015,45 @@ static zio_pipe_stage_t *zio_pipeline[] = {
zio_checksum_verify,
zio_done
};
/* dnp is the dnode for zb1->zb_object */
boolean_t
zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
const zbookmark_t *zb2)
{
uint64_t zb1nextL0, zb2thisobj;
ASSERT(zb1->zb_objset == zb2->zb_objset);
ASSERT(zb2->zb_level == 0);
/*
* A bookmark in the deadlist is considered to be after
* everything else.
*/
if (zb2->zb_object == DMU_DEADLIST_OBJECT)
return (B_TRUE);
/* The objset_phys_t isn't before anything. */
if (dnp == NULL)
return (B_FALSE);
zb1nextL0 = (zb1->zb_blkid + 1) <<
((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
zb2thisobj = zb2->zb_object ? zb2->zb_object :
zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
uint64_t nextobj = zb1nextL0 *
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
return (nextobj <= zb2thisobj);
}
if (zb1->zb_object < zb2thisobj)
return (B_TRUE);
if (zb1->zb_object > zb2thisobj)
return (B_FALSE);
if (zb2->zb_object == DMU_META_DNODE_OBJECT)
return (B_FALSE);
return (zb1nextL0 <= zb2->zb_blkid);
}

View File

@ -172,6 +172,7 @@ typedef enum {
ZPOOL_PROP_READONLY,
ZPOOL_PROP_COMMENT,
ZPOOL_PROP_EXPANDSZ,
ZPOOL_PROP_FREEING,
ZPOOL_NUM_PROPS
} zpool_prop_t;
@ -245,6 +246,8 @@ const char *zpool_prop_to_name(zpool_prop_t);
const char *zpool_prop_default_string(zpool_prop_t);
uint64_t zpool_prop_default_numeric(zpool_prop_t);
boolean_t zpool_prop_readonly(zpool_prop_t);
boolean_t zpool_prop_feature(const char *);
boolean_t zpool_prop_unsupported(const char *name);
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
@ -352,6 +355,7 @@ typedef enum {
#define SPA_VERSION_26 26ULL
#define SPA_VERSION_27 27ULL
#define SPA_VERSION_28 28ULL
#define SPA_VERSION_5000 5000ULL
/*
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
@ -359,8 +363,8 @@ typedef enum {
* and do the appropriate changes. Also bump the version number in
* usr/src/grub/capability.
*/
#define SPA_VERSION SPA_VERSION_28
#define SPA_VERSION_STRING "28"
#define SPA_VERSION SPA_VERSION_5000
#define SPA_VERSION_STRING "5000"
/*
* Symbolic names for the changes that caused a SPA_VERSION switch.
@ -411,6 +415,12 @@ typedef enum {
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
#define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28
#define SPA_VERSION_FEATURES SPA_VERSION_5000
#define SPA_VERSION_IS_SUPPORTED(v) \
(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
/*
* ZPL version - rev'd whenever an incompatible on-disk format change
@ -508,6 +518,11 @@ typedef struct zpool_rewind_policy {
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
#define ZPOOL_CONFIG_REWIND_INFO "rewind_info" /* not stored on disk */
#define ZPOOL_CONFIG_UNSUP_FEAT "unsup_feat" /* not stored on disk */
#define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
/*
* The persistent vdev state is stored as separate values rather than a single
* 'vdev_state' entry. This is because a device can be in multiple states, such
@ -586,6 +601,7 @@ typedef enum vdev_aux {
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
VDEV_AUX_UNSUP_FEAT, /* unsupported features */
VDEV_AUX_SPARED, /* hot spare used in another pool */
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_NVPAIR_H
@ -274,6 +275,73 @@ int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
int nvpair_value_double(nvpair_t *, double *);
#endif
nvlist_t *fnvlist_alloc(void);
void fnvlist_free(nvlist_t *);
size_t fnvlist_size(nvlist_t *);
char *fnvlist_pack(nvlist_t *, size_t *);
void fnvlist_pack_free(char *, size_t);
nvlist_t *fnvlist_unpack(char *, size_t);
nvlist_t *fnvlist_dup(nvlist_t *);
void fnvlist_merge(nvlist_t *, nvlist_t *);
void fnvlist_add_boolean(nvlist_t *, const char *);
void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
void fnvlist_add_byte(nvlist_t *, const char *, uchar_t);
void fnvlist_add_int8(nvlist_t *, const char *, int8_t);
void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t);
void fnvlist_add_int16(nvlist_t *, const char *, int16_t);
void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t);
void fnvlist_add_int32(nvlist_t *, const char *, int32_t);
void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t);
void fnvlist_add_int64(nvlist_t *, const char *, int64_t);
void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t);
void fnvlist_add_string(nvlist_t *, const char *, const char *);
void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
void fnvlist_add_nvpair(nvlist_t *, nvpair_t *);
void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t);
void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
void fnvlist_remove(nvlist_t *, const char *);
void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *);
nvpair_t *fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name);
boolean_t fnvlist_lookup_boolean(nvlist_t *nvl, const char *name);
boolean_t fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name);
uchar_t fnvlist_lookup_byte(nvlist_t *nvl, const char *name);
int8_t fnvlist_lookup_int8(nvlist_t *nvl, const char *name);
int16_t fnvlist_lookup_int16(nvlist_t *nvl, const char *name);
int32_t fnvlist_lookup_int32(nvlist_t *nvl, const char *name);
int64_t fnvlist_lookup_int64(nvlist_t *nvl, const char *name);
uint8_t fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name);
uint16_t fnvlist_lookup_uint16(nvlist_t *nvl, const char *name);
uint32_t fnvlist_lookup_uint32(nvlist_t *nvl, const char *name);
uint64_t fnvlist_lookup_uint64(nvlist_t *nvl, const char *name);
char *fnvlist_lookup_string(nvlist_t *nvl, const char *name);
nvlist_t *fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name);
boolean_t fnvpair_value_boolean_value(nvpair_t *nvp);
uchar_t fnvpair_value_byte(nvpair_t *nvp);
int8_t fnvpair_value_int8(nvpair_t *nvp);
int16_t fnvpair_value_int16(nvpair_t *nvp);
int32_t fnvpair_value_int32(nvpair_t *nvp);
int64_t fnvpair_value_int64(nvpair_t *nvp);
uint8_t fnvpair_value_uint8_t(nvpair_t *nvp);
uint16_t fnvpair_value_uint16(nvpair_t *nvp);
uint32_t fnvpair_value_uint32(nvpair_t *nvp);
uint64_t fnvpair_value_uint64(nvpair_t *nvp);
char *fnvpair_value_string(nvpair_t *nvp);
nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp);
#ifdef __cplusplus
}
#endif

View File

@ -13,6 +13,7 @@ SRCS+= avl.c
.PATH: ${SUNW}/common/nvpair
SRCS+= nvpair.c
SRCS+= nvpair_alloc_fixed.c
SRCS+= fnvpair.c
.PATH: ${.CURDIR}/../../cddl/contrib/opensolaris/common/unicode
SRCS+= u8_textprep.c