Update vendor-sys/illumos/dist to pre libzfs_core state
illumos-gate revision 13742:b6bbdd77139c Obtained from: ssh://anonhg@hg.illumos.org/illumos-gate
This commit is contained in:
parent
4f8f62335a
commit
5bb19a17c0
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@ -372,7 +373,7 @@ access_mask_set(int haswriteperm, int hasreadperm, int isowner, int isallow)
|
||||
* by nfsace, assuming aclent_t -> nfsace semantics.
|
||||
*/
|
||||
static uint32_t
|
||||
mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow)
|
||||
mode_to_ace_access(mode_t mode, boolean_t isdir, int isowner, int isallow)
|
||||
{
|
||||
uint32_t access = 0;
|
||||
int haswriteperm = 0;
|
||||
@ -415,7 +416,7 @@ mode_to_ace_access(mode_t mode, int isdir, int isowner, int isallow)
|
||||
access |= ACE_DELETE_CHILD;
|
||||
}
|
||||
/* exec */
|
||||
if (mode & 01) {
|
||||
if (mode & S_IXOTH) {
|
||||
access |= ACE_EXECUTE;
|
||||
}
|
||||
|
||||
@ -666,7 +667,7 @@ out:
|
||||
}
|
||||
|
||||
static int
|
||||
convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir,
|
||||
convert_aent_to_ace(aclent_t *aclentp, int aclcnt, boolean_t isdir,
|
||||
ace_t **retacep, int *retacecnt)
|
||||
{
|
||||
ace_t *acep;
|
||||
@ -692,7 +693,7 @@ convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir,
|
||||
dfaclcnt = aclcnt - i;
|
||||
}
|
||||
|
||||
if (dfaclcnt && isdir == 0) {
|
||||
if (dfaclcnt && !isdir) {
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
@ -730,7 +731,7 @@ convert_aent_to_ace(aclent_t *aclentp, int aclcnt, int isdir,
|
||||
}
|
||||
|
||||
static int
|
||||
ace_mask_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
|
||||
ace_mask_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir)
|
||||
{
|
||||
int error = 0;
|
||||
o_mode_t mode = 0;
|
||||
@ -1027,7 +1028,7 @@ out:
|
||||
}
|
||||
|
||||
static int
|
||||
ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
|
||||
ace_allow_to_mode(uint32_t mask, o_mode_t *modep, boolean_t isdir)
|
||||
{
|
||||
/* ACE_READ_ACL and ACE_READ_ATTRIBUTES must both be set */
|
||||
if ((mask & (ACE_READ_ACL | ACE_READ_ATTRIBUTES)) !=
|
||||
@ -1040,7 +1041,7 @@ ace_allow_to_mode(uint32_t mask, o_mode_t *modep, int isdir)
|
||||
|
||||
static int
|
||||
acevals_to_aent(acevals_t *vals, aclent_t *dest, ace_list_t *list,
|
||||
uid_t owner, gid_t group, int isdir)
|
||||
uid_t owner, gid_t group, boolean_t isdir)
|
||||
{
|
||||
int error;
|
||||
uint32_t flips = ACE_POSIX_SUPPORTED_BITS;
|
||||
@ -1080,7 +1081,7 @@ out:
|
||||
|
||||
static int
|
||||
ace_list_to_aent(ace_list_t *list, aclent_t **aclentp, int *aclcnt,
|
||||
uid_t owner, gid_t group, int isdir)
|
||||
uid_t owner, gid_t group, boolean_t isdir)
|
||||
{
|
||||
int error = 0;
|
||||
aclent_t *aent, *result = NULL;
|
||||
@ -1260,7 +1261,7 @@ acevals_compare(const void *va, const void *vb)
|
||||
static int
|
||||
ln_ace_to_aent(ace_t *ace, int n, uid_t owner, gid_t group,
|
||||
aclent_t **aclentp, int *aclcnt, aclent_t **dfaclentp, int *dfaclcnt,
|
||||
int isdir)
|
||||
boolean_t isdir)
|
||||
{
|
||||
int error = 0;
|
||||
ace_t *acep;
|
||||
@ -1455,7 +1456,7 @@ out:
|
||||
}
|
||||
|
||||
static int
|
||||
convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir,
|
||||
convert_ace_to_aent(ace_t *acebufp, int acecnt, boolean_t isdir,
|
||||
uid_t owner, gid_t group, aclent_t **retaclentp, int *retaclcnt)
|
||||
{
|
||||
int error = 0;
|
||||
@ -1497,7 +1498,7 @@ convert_ace_to_aent(ace_t *acebufp, int acecnt, int isdir,
|
||||
|
||||
|
||||
int
|
||||
acl_translate(acl_t *aclp, int target_flavor, int isdir, uid_t owner,
|
||||
acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir, uid_t owner,
|
||||
gid_t group)
|
||||
{
|
||||
int aclcnt;
|
||||
@ -1568,101 +1569,105 @@ out:
|
||||
}
|
||||
|
||||
void
|
||||
acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
|
||||
uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone)
|
||||
acl_trivial_access_masks(mode_t mode, boolean_t isdir, trivial_acl_t *masks)
|
||||
{
|
||||
*deny1 = *deny2 = *allow0 = *group = 0;
|
||||
uint32_t read_mask = ACE_READ_DATA;
|
||||
uint32_t write_mask = ACE_WRITE_DATA|ACE_APPEND_DATA;
|
||||
uint32_t execute_mask = ACE_EXECUTE;
|
||||
|
||||
(void) isdir; /* will need this later */
|
||||
|
||||
masks->deny1 = 0;
|
||||
if (!(mode & S_IRUSR) && (mode & (S_IRGRP|S_IROTH)))
|
||||
*deny1 |= ACE_READ_DATA;
|
||||
masks->deny1 |= read_mask;
|
||||
if (!(mode & S_IWUSR) && (mode & (S_IWGRP|S_IWOTH)))
|
||||
*deny1 |= ACE_WRITE_DATA;
|
||||
masks->deny1 |= write_mask;
|
||||
if (!(mode & S_IXUSR) && (mode & (S_IXGRP|S_IXOTH)))
|
||||
*deny1 |= ACE_EXECUTE;
|
||||
masks->deny1 |= execute_mask;
|
||||
|
||||
masks->deny2 = 0;
|
||||
if (!(mode & S_IRGRP) && (mode & S_IROTH))
|
||||
*deny2 = ACE_READ_DATA;
|
||||
masks->deny2 |= read_mask;
|
||||
if (!(mode & S_IWGRP) && (mode & S_IWOTH))
|
||||
*deny2 |= ACE_WRITE_DATA;
|
||||
masks->deny2 |= write_mask;
|
||||
if (!(mode & S_IXGRP) && (mode & S_IXOTH))
|
||||
*deny2 |= ACE_EXECUTE;
|
||||
masks->deny2 |= execute_mask;
|
||||
|
||||
masks->allow0 = 0;
|
||||
if ((mode & S_IRUSR) && (!(mode & S_IRGRP) && (mode & S_IROTH)))
|
||||
*allow0 |= ACE_READ_DATA;
|
||||
masks->allow0 |= read_mask;
|
||||
if ((mode & S_IWUSR) && (!(mode & S_IWGRP) && (mode & S_IWOTH)))
|
||||
*allow0 |= ACE_WRITE_DATA;
|
||||
masks->allow0 |= write_mask;
|
||||
if ((mode & S_IXUSR) && (!(mode & S_IXGRP) && (mode & S_IXOTH)))
|
||||
*allow0 |= ACE_EXECUTE;
|
||||
masks->allow0 |= execute_mask;
|
||||
|
||||
*owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
|
||||
masks->owner = ACE_WRITE_ATTRIBUTES|ACE_WRITE_OWNER|ACE_WRITE_ACL|
|
||||
ACE_WRITE_NAMED_ATTRS|ACE_READ_ACL|ACE_READ_ATTRIBUTES|
|
||||
ACE_READ_NAMED_ATTRS|ACE_SYNCHRONIZE;
|
||||
if (mode & S_IRUSR)
|
||||
*owner |= ACE_READ_DATA;
|
||||
masks->owner |= read_mask;
|
||||
if (mode & S_IWUSR)
|
||||
*owner |= ACE_WRITE_DATA|ACE_APPEND_DATA;
|
||||
masks->owner |= write_mask;
|
||||
if (mode & S_IXUSR)
|
||||
*owner |= ACE_EXECUTE;
|
||||
masks->owner |= execute_mask;
|
||||
|
||||
*group = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
|
||||
masks->group = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
|
||||
ACE_SYNCHRONIZE;
|
||||
if (mode & S_IRGRP)
|
||||
*group |= ACE_READ_DATA;
|
||||
masks->group |= read_mask;
|
||||
if (mode & S_IWGRP)
|
||||
*group |= ACE_WRITE_DATA|ACE_APPEND_DATA;
|
||||
masks->group |= write_mask;
|
||||
if (mode & S_IXGRP)
|
||||
*group |= ACE_EXECUTE;
|
||||
masks->group |= execute_mask;
|
||||
|
||||
*everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES| ACE_READ_NAMED_ATTRS|
|
||||
masks->everyone = ACE_READ_ACL|ACE_READ_ATTRIBUTES|ACE_READ_NAMED_ATTRS|
|
||||
ACE_SYNCHRONIZE;
|
||||
if (mode & S_IROTH)
|
||||
*everyone |= ACE_READ_DATA;
|
||||
masks->everyone |= read_mask;
|
||||
if (mode & S_IWOTH)
|
||||
*everyone |= ACE_WRITE_DATA|ACE_APPEND_DATA;
|
||||
masks->everyone |= write_mask;
|
||||
if (mode & S_IXOTH)
|
||||
*everyone |= ACE_EXECUTE;
|
||||
masks->everyone |= execute_mask;
|
||||
}
|
||||
|
||||
int
|
||||
acl_trivial_create(mode_t mode, ace_t **acl, int *count)
|
||||
acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count)
|
||||
{
|
||||
uint32_t deny1, deny2;
|
||||
uint32_t allow0;
|
||||
uint32_t owner, group, everyone;
|
||||
int index = 0;
|
||||
int error;
|
||||
trivial_acl_t masks;
|
||||
|
||||
*count = 3;
|
||||
acl_trivial_access_masks(mode, &allow0, &deny1, &deny2, &owner, &group,
|
||||
&everyone);
|
||||
acl_trivial_access_masks(mode, isdir, &masks);
|
||||
|
||||
if (allow0)
|
||||
if (masks.allow0)
|
||||
(*count)++;
|
||||
if (deny1)
|
||||
if (masks.deny1)
|
||||
(*count)++;
|
||||
if (deny2)
|
||||
if (masks.deny2)
|
||||
(*count)++;
|
||||
|
||||
if ((error = cacl_malloc((void **)acl, *count * sizeof (ace_t))) != 0)
|
||||
return (error);
|
||||
|
||||
if (allow0) {
|
||||
SET_ACE(acl, index, -1, allow0, ACE_ACCESS_ALLOWED_ACE_TYPE,
|
||||
ACE_OWNER);
|
||||
if (masks.allow0) {
|
||||
SET_ACE(acl, index, -1, masks.allow0,
|
||||
ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
|
||||
}
|
||||
if (deny1) {
|
||||
SET_ACE(acl, index, -1, deny1, ACE_ACCESS_DENIED_ACE_TYPE,
|
||||
ACE_OWNER);
|
||||
if (masks.deny1) {
|
||||
SET_ACE(acl, index, -1, masks.deny1,
|
||||
ACE_ACCESS_DENIED_ACE_TYPE, ACE_OWNER);
|
||||
}
|
||||
if (deny2) {
|
||||
SET_ACE(acl, index, -1, deny2, ACE_ACCESS_DENIED_ACE_TYPE,
|
||||
ACE_GROUP|ACE_IDENTIFIER_GROUP);
|
||||
if (masks.deny2) {
|
||||
SET_ACE(acl, index, -1, masks.deny2,
|
||||
ACE_ACCESS_DENIED_ACE_TYPE, ACE_GROUP|ACE_IDENTIFIER_GROUP);
|
||||
}
|
||||
|
||||
SET_ACE(acl, index, -1, owner, ACE_ACCESS_ALLOWED_ACE_TYPE, ACE_OWNER);
|
||||
SET_ACE(acl, index, -1, group, ACE_ACCESS_ALLOWED_ACE_TYPE,
|
||||
SET_ACE(acl, index, -1, masks.owner, ACE_ACCESS_ALLOWED_ACE_TYPE,
|
||||
ACE_OWNER);
|
||||
SET_ACE(acl, index, -1, masks.group, ACE_ACCESS_ALLOWED_ACE_TYPE,
|
||||
ACE_IDENTIFIER_GROUP|ACE_GROUP);
|
||||
SET_ACE(acl, index, -1, everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
|
||||
SET_ACE(acl, index, -1, masks.everyone, ACE_ACCESS_ALLOWED_ACE_TYPE,
|
||||
ACE_EVERYONE);
|
||||
|
||||
return (0);
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ACL_COMMON_H
|
||||
@ -33,7 +34,14 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern ace_t trivial_acl[6];
|
||||
typedef struct trivial_acl {
|
||||
uint32_t allow0; /* allow mask for bits only in owner */
|
||||
uint32_t deny1; /* deny mask for bits not in owner */
|
||||
uint32_t deny2; /* deny mask for bits not in group */
|
||||
uint32_t owner; /* allow mask matching mode */
|
||||
uint32_t group; /* allow mask matching mode */
|
||||
uint32_t everyone; /* allow mask matching mode */
|
||||
} trivial_acl_t;
|
||||
|
||||
extern int acltrivial(const char *);
|
||||
extern void adjust_ace_pair(ace_t *pair, mode_t mode);
|
||||
@ -44,13 +52,13 @@ extern int ace_trivial_common(void *, int,
|
||||
uint32_t *mask));
|
||||
extern acl_t *acl_alloc(acl_type_t);
|
||||
extern void acl_free(acl_t *aclp);
|
||||
extern int acl_translate(acl_t *aclp, int target_flavor,
|
||||
int isdir, uid_t owner, gid_t group);
|
||||
extern int acl_translate(acl_t *aclp, int target_flavor, boolean_t isdir,
|
||||
uid_t owner, gid_t group);
|
||||
void ksort(caddr_t v, int n, int s, int (*f)());
|
||||
int cmp2acls(void *a, void *b);
|
||||
int acl_trivial_create(mode_t mode, ace_t **acl, int *count);
|
||||
void acl_trivial_access_masks(mode_t mode, uint32_t *allow0, uint32_t *deny1,
|
||||
uint32_t *deny2, uint32_t *owner, uint32_t *group, uint32_t *everyone);
|
||||
int acl_trivial_create(mode_t mode, boolean_t isdir, ace_t **acl, int *count);
|
||||
void acl_trivial_access_masks(mode_t mode, boolean_t isdir,
|
||||
trivial_acl_t *masks);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
496
common/nvpair/fnvpair.c
Normal file
496
common/nvpair/fnvpair.c
Normal file
@ -0,0 +1,496 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/debug.h>
|
||||
#ifndef _KERNEL
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
/*
|
||||
* "Force" nvlist wrapper.
|
||||
*
|
||||
* These functions wrap the nvlist_* functions with assertions that assume
|
||||
* the operation is successful. This allows the caller's code to be much
|
||||
* more readable, especially for the fnvlist_lookup_* and fnvpair_value_*
|
||||
* functions, which can return the requested value (rather than filling in
|
||||
* a pointer).
|
||||
*
|
||||
* These functions use NV_UNIQUE_NAME, encoding NV_ENCODE_NATIVE, and allocate
|
||||
* with KM_SLEEP.
|
||||
*
|
||||
* More wrappers should be added as needed -- for example
|
||||
* nvlist_lookup_*_array and nvpair_value_*_array.
|
||||
*/
|
||||
|
||||
nvlist_t *
|
||||
fnvlist_alloc(void)
|
||||
{
|
||||
nvlist_t *nvl;
|
||||
VERIFY3U(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP), ==, 0);
|
||||
return (nvl);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_free(nvlist_t *nvl)
|
||||
{
|
||||
nvlist_free(nvl);
|
||||
}
|
||||
|
||||
size_t
|
||||
fnvlist_size(nvlist_t *nvl)
|
||||
{
|
||||
size_t size;
|
||||
VERIFY3U(nvlist_size(nvl, &size, NV_ENCODE_NATIVE), ==, 0);
|
||||
return (size);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns allocated buffer of size *sizep. Caller must free the buffer with
|
||||
* fnvlist_pack_free().
|
||||
*/
|
||||
char *
|
||||
fnvlist_pack(nvlist_t *nvl, size_t *sizep)
|
||||
{
|
||||
char *packed = 0;
|
||||
VERIFY3U(nvlist_pack(nvl, &packed, sizep, NV_ENCODE_NATIVE,
|
||||
KM_SLEEP), ==, 0);
|
||||
return (packed);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
fnvlist_pack_free(char *pack, size_t size)
|
||||
{
|
||||
#ifdef _KERNEL
|
||||
kmem_free(pack, size);
|
||||
#else
|
||||
free(pack);
|
||||
#endif
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
fnvlist_unpack(char *buf, size_t buflen)
|
||||
{
|
||||
nvlist_t *rv;
|
||||
VERIFY3U(nvlist_unpack(buf, buflen, &rv, KM_SLEEP), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
fnvlist_dup(nvlist_t *nvl)
|
||||
{
|
||||
nvlist_t *rv;
|
||||
VERIFY3U(nvlist_dup(nvl, &rv, KM_SLEEP), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_merge(nvlist_t *dst, nvlist_t *src)
|
||||
{
|
||||
VERIFY3U(nvlist_merge(dst, src, KM_SLEEP), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_boolean(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
VERIFY3U(nvlist_add_boolean(nvl, name), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_boolean_value(nvlist_t *nvl, const char *name, boolean_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_boolean_value(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_byte(nvlist_t *nvl, const char *name, uchar_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_byte(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int8(nvlist_t *nvl, const char *name, int8_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int8(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint8(nvlist_t *nvl, const char *name, uint8_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint8(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int16(nvlist_t *nvl, const char *name, int16_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int16(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint16(nvlist_t *nvl, const char *name, uint16_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint16(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int32(nvlist_t *nvl, const char *name, int32_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int32(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint32(nvlist_t *nvl, const char *name, uint32_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint32(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int64(nvlist_t *nvl, const char *name, int64_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int64(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint64(nvlist_t *nvl, const char *name, uint64_t val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint64(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_string(nvlist_t *nvl, const char *name, const char *val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_string(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_nvlist(nvlist_t *nvl, const char *name, nvlist_t *val)
|
||||
{
|
||||
VERIFY3U(nvlist_add_nvlist(nvl, name, val), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_nvpair(nvlist_t *nvl, nvpair_t *pair)
|
||||
{
|
||||
VERIFY3U(nvlist_add_nvpair(nvl, pair), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_boolean_array(nvlist_t *nvl, const char *name,
|
||||
boolean_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_boolean_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_byte_array(nvlist_t *nvl, const char *name, uchar_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_byte_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int8_array(nvlist_t *nvl, const char *name, int8_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int8_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint8_array(nvlist_t *nvl, const char *name, uint8_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint8_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int16_array(nvlist_t *nvl, const char *name, int16_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int16_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint16_array(nvlist_t *nvl, const char *name,
|
||||
uint16_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint16_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int32_array(nvlist_t *nvl, const char *name, int32_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int32_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint32_array(nvlist_t *nvl, const char *name,
|
||||
uint32_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint32_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_int64_array(nvlist_t *nvl, const char *name, int64_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_int64_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_uint64_array(nvlist_t *nvl, const char *name,
|
||||
uint64_t *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_uint64_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_string_array(nvlist_t *nvl, const char *name,
|
||||
char * const *val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_string_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_add_nvlist_array(nvlist_t *nvl, const char *name,
|
||||
nvlist_t **val, uint_t n)
|
||||
{
|
||||
VERIFY3U(nvlist_add_nvlist_array(nvl, name, val, n), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_remove(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
VERIFY3U(nvlist_remove_all(nvl, name), ==, 0);
|
||||
}
|
||||
|
||||
void
|
||||
fnvlist_remove_nvpair(nvlist_t *nvl, nvpair_t *pair)
|
||||
{
|
||||
VERIFY3U(nvlist_remove_nvpair(nvl, pair), ==, 0);
|
||||
}
|
||||
|
||||
nvpair_t *
|
||||
fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
nvpair_t *rv;
|
||||
VERIFY3U(nvlist_lookup_nvpair(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
/* returns B_TRUE if the entry exists */
|
||||
boolean_t
|
||||
fnvlist_lookup_boolean(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
return (nvlist_lookup_boolean(nvl, name) == 0);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
boolean_t rv;
|
||||
VERIFY3U(nvlist_lookup_boolean_value(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uchar_t
|
||||
fnvlist_lookup_byte(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
uchar_t rv;
|
||||
VERIFY3U(nvlist_lookup_byte(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int8_t
|
||||
fnvlist_lookup_int8(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
int8_t rv;
|
||||
VERIFY3U(nvlist_lookup_int8(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int16_t
|
||||
fnvlist_lookup_int16(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
int16_t rv;
|
||||
VERIFY3U(nvlist_lookup_int16(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int32_t
|
||||
fnvlist_lookup_int32(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
int32_t rv;
|
||||
VERIFY3U(nvlist_lookup_int32(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int64_t
|
||||
fnvlist_lookup_int64(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
int64_t rv;
|
||||
VERIFY3U(nvlist_lookup_int64(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
uint8_t rv;
|
||||
VERIFY3U(nvlist_lookup_uint8(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint16_t
|
||||
fnvlist_lookup_uint16(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
uint16_t rv;
|
||||
VERIFY3U(nvlist_lookup_uint16(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
fnvlist_lookup_uint32(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
uint32_t rv;
|
||||
VERIFY3U(nvlist_lookup_uint32(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
fnvlist_lookup_uint64(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
uint64_t rv;
|
||||
VERIFY3U(nvlist_lookup_uint64(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
char *
|
||||
fnvlist_lookup_string(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
char *rv;
|
||||
VERIFY3U(nvlist_lookup_string(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name)
|
||||
{
|
||||
nvlist_t *rv;
|
||||
VERIFY3U(nvlist_lookup_nvlist(nvl, name, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
fnvpair_value_boolean_value(nvpair_t *nvp)
|
||||
{
|
||||
boolean_t rv;
|
||||
VERIFY3U(nvpair_value_boolean_value(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uchar_t
|
||||
fnvpair_value_byte(nvpair_t *nvp)
|
||||
{
|
||||
uchar_t rv;
|
||||
VERIFY3U(nvpair_value_byte(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int8_t
|
||||
fnvpair_value_int8(nvpair_t *nvp)
|
||||
{
|
||||
int8_t rv;
|
||||
VERIFY3U(nvpair_value_int8(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int16_t
|
||||
fnvpair_value_int16(nvpair_t *nvp)
|
||||
{
|
||||
int16_t rv;
|
||||
VERIFY3U(nvpair_value_int16(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int32_t
|
||||
fnvpair_value_int32(nvpair_t *nvp)
|
||||
{
|
||||
int32_t rv;
|
||||
VERIFY3U(nvpair_value_int32(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
int64_t
|
||||
fnvpair_value_int64(nvpair_t *nvp)
|
||||
{
|
||||
int64_t rv;
|
||||
VERIFY3U(nvpair_value_int64(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint8_t
|
||||
fnvpair_value_uint8_t(nvpair_t *nvp)
|
||||
{
|
||||
uint8_t rv;
|
||||
VERIFY3U(nvpair_value_uint8(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint16_t
|
||||
fnvpair_value_uint16(nvpair_t *nvp)
|
||||
{
|
||||
uint16_t rv;
|
||||
VERIFY3U(nvpair_value_uint16(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint32_t
|
||||
fnvpair_value_uint32(nvpair_t *nvp)
|
||||
{
|
||||
uint32_t rv;
|
||||
VERIFY3U(nvpair_value_uint32(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
fnvpair_value_uint64(nvpair_t *nvp)
|
||||
{
|
||||
uint64_t rv;
|
||||
VERIFY3U(nvpair_value_uint64(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
char *
|
||||
fnvpair_value_string(nvpair_t *nvp)
|
||||
{
|
||||
char *rv;
|
||||
VERIFY3U(nvpair_value_string(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
||||
|
||||
nvlist_t *
|
||||
fnvpair_value_nvlist(nvpair_t *nvp)
|
||||
{
|
||||
nvlist_t *rv;
|
||||
VERIFY3U(nvpair_value_nvlist(nvp, &rv), ==, 0);
|
||||
return (rv);
|
||||
}
|
156
common/zfs/zfeature_common.c
Normal file
156
common/zfs/zfeature_common.c
Normal file
@ -0,0 +1,156 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/systm.h>
|
||||
#else
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#endif
|
||||
#include <sys/debug.h>
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/types.h>
|
||||
#include "zfeature_common.h"
|
||||
|
||||
/*
|
||||
* Set to disable all feature checks while opening pools, allowing pools with
|
||||
* unsupported features to be opened. Set for testing only.
|
||||
*/
|
||||
boolean_t zfeature_checks_disable = B_FALSE;
|
||||
|
||||
zfeature_info_t spa_feature_table[SPA_FEATURES];
|
||||
|
||||
/*
|
||||
* Valid characters for feature guids. This list is mainly for aesthetic
|
||||
* purposes and could be expanded in the future. There are different allowed
|
||||
* characters in the guids reverse dns portion (before the colon) and its
|
||||
* short name (after the colon).
|
||||
*/
|
||||
static int
|
||||
valid_char(char c, boolean_t after_colon)
|
||||
{
|
||||
return ((c >= 'a' && c <= 'z') ||
|
||||
(c >= '0' && c <= '9') ||
|
||||
c == (after_colon ? '_' : '.'));
|
||||
}
|
||||
|
||||
/*
|
||||
* Every feature guid must contain exactly one colon which separates a reverse
|
||||
* dns organization name from the feature's "short" name (e.g.
|
||||
* "com.company:feature_name").
|
||||
*/
|
||||
boolean_t
|
||||
zfeature_is_valid_guid(const char *name)
|
||||
{
|
||||
int i;
|
||||
boolean_t has_colon = B_FALSE;
|
||||
|
||||
i = 0;
|
||||
while (name[i] != '\0') {
|
||||
char c = name[i++];
|
||||
if (c == ':') {
|
||||
if (has_colon)
|
||||
return (B_FALSE);
|
||||
has_colon = B_TRUE;
|
||||
continue;
|
||||
}
|
||||
if (!valid_char(c, has_colon))
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
return (has_colon);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zfeature_is_supported(const char *guid)
|
||||
{
|
||||
if (zfeature_checks_disable)
|
||||
return (B_TRUE);
|
||||
|
||||
return (0 == zfeature_lookup_guid(guid, NULL));
|
||||
}
|
||||
|
||||
int
|
||||
zfeature_lookup_guid(const char *guid, zfeature_info_t **res)
|
||||
{
|
||||
for (int i = 0; i < SPA_FEATURES; i++) {
|
||||
zfeature_info_t *feature = &spa_feature_table[i];
|
||||
if (strcmp(guid, feature->fi_guid) == 0) {
|
||||
if (res != NULL)
|
||||
*res = feature;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
int
|
||||
zfeature_lookup_name(const char *name, zfeature_info_t **res)
|
||||
{
|
||||
for (int i = 0; i < SPA_FEATURES; i++) {
|
||||
zfeature_info_t *feature = &spa_feature_table[i];
|
||||
if (strcmp(name, feature->fi_uname) == 0) {
|
||||
if (res != NULL)
|
||||
*res = feature;
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
static void
|
||||
zfeature_register(int fid, const char *guid, const char *name, const char *desc,
|
||||
boolean_t readonly, boolean_t mos, zfeature_info_t **deps)
|
||||
{
|
||||
zfeature_info_t *feature = &spa_feature_table[fid];
|
||||
static zfeature_info_t *nodeps[] = { NULL };
|
||||
|
||||
ASSERT(name != NULL);
|
||||
ASSERT(desc != NULL);
|
||||
ASSERT(!readonly || !mos);
|
||||
ASSERT3U(fid, <, SPA_FEATURES);
|
||||
ASSERT(zfeature_is_valid_guid(guid));
|
||||
|
||||
if (deps == NULL)
|
||||
deps = nodeps;
|
||||
|
||||
feature->fi_guid = guid;
|
||||
feature->fi_uname = name;
|
||||
feature->fi_desc = desc;
|
||||
feature->fi_can_readonly = readonly;
|
||||
feature->fi_mos = mos;
|
||||
feature->fi_depends = deps;
|
||||
}
|
||||
|
||||
void
|
||||
zpool_feature_init(void)
|
||||
{
|
||||
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
|
||||
"com.delphix:async_destroy", "async_destroy",
|
||||
"Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
|
||||
}
|
71
common/zfs/zfeature_common.h
Normal file
71
common/zfs/zfeature_common.h
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZFEATURE_COMMON_H
|
||||
#define _ZFEATURE_COMMON_H
|
||||
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/inttypes.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct zfeature_info;
|
||||
|
||||
typedef struct zfeature_info {
|
||||
const char *fi_uname; /* User-facing feature name */
|
||||
const char *fi_guid; /* On-disk feature identifier */
|
||||
const char *fi_desc; /* Feature description */
|
||||
boolean_t fi_can_readonly; /* Can open pool readonly w/o support? */
|
||||
boolean_t fi_mos; /* Is the feature necessary to read the MOS? */
|
||||
struct zfeature_info **fi_depends; /* array; null terminated */
|
||||
} zfeature_info_t;
|
||||
|
||||
typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
|
||||
|
||||
#define ZFS_FEATURE_DEBUG
|
||||
|
||||
enum spa_feature {
|
||||
SPA_FEATURE_ASYNC_DESTROY,
|
||||
SPA_FEATURES
|
||||
} spa_feature_t;
|
||||
|
||||
extern zfeature_info_t spa_feature_table[SPA_FEATURES];
|
||||
|
||||
extern boolean_t zfeature_is_valid_guid(const char *);
|
||||
|
||||
extern boolean_t zfeature_is_supported(const char *);
|
||||
extern int zfeature_lookup_guid(const char *, zfeature_info_t **res);
|
||||
extern int zfeature_lookup_name(const char *, zfeature_info_t **res);
|
||||
|
||||
extern void zpool_feature_init(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFEATURE_COMMON_H */
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#if defined(_KERNEL)
|
||||
@ -60,7 +61,7 @@ zfs_deleg_perm_tab_t zfs_deleg_perm_tab[] = {
|
||||
{ZFS_DELEG_PERM_ROLLBACK, ZFS_DELEG_NOTE_ROLLBACK },
|
||||
{ZFS_DELEG_PERM_SNAPSHOT, ZFS_DELEG_NOTE_SNAPSHOT },
|
||||
{ZFS_DELEG_PERM_SHARE, ZFS_DELEG_NOTE_SHARE },
|
||||
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_NONE },
|
||||
{ZFS_DELEG_PERM_SEND, ZFS_DELEG_NOTE_SEND },
|
||||
{ZFS_DELEG_PERM_USERPROP, ZFS_DELEG_NOTE_USERPROP },
|
||||
{ZFS_DELEG_PERM_USERQUOTA, ZFS_DELEG_NOTE_USERQUOTA },
|
||||
{ZFS_DELEG_PERM_GROUPQUOTA, ZFS_DELEG_NOTE_GROUPQUOTA },
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2010 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_DELEG_H
|
||||
@ -51,6 +52,7 @@ typedef enum {
|
||||
ZFS_DELEG_NOTE_CLONE,
|
||||
ZFS_DELEG_NOTE_PROMOTE,
|
||||
ZFS_DELEG_NOTE_RENAME,
|
||||
ZFS_DELEG_NOTE_SEND,
|
||||
ZFS_DELEG_NOTE_RECEIVE,
|
||||
ZFS_DELEG_NOTE_ALLOW,
|
||||
ZFS_DELEG_NOTE_USERPROP,
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -104,6 +105,13 @@ zfs_prop_init(void)
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t acl_mode_table[] = {
|
||||
{ "discard", ZFS_ACL_DISCARD },
|
||||
{ "groupmask", ZFS_ACL_GROUPMASK },
|
||||
{ "passthrough", ZFS_ACL_PASSTHROUGH },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static zprop_index_t acl_inherit_table[] = {
|
||||
{ "discard", ZFS_ACL_DISCARD },
|
||||
{ "noallow", ZFS_ACL_NOALLOW },
|
||||
@ -207,6 +215,9 @@ zfs_prop_init(void)
|
||||
zprop_register_index(ZFS_PROP_SNAPDIR, "snapdir", ZFS_SNAPDIR_HIDDEN,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"hidden | visible", "SNAPDIR", snapdir_table);
|
||||
zprop_register_index(ZFS_PROP_ACLMODE, "aclmode", ZFS_ACL_DISCARD,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"discard | groupmask | passthrough", "ACLMODE", acl_mode_table);
|
||||
zprop_register_index(ZFS_PROP_ACLINHERIT, "aclinherit",
|
||||
ZFS_ACL_RESTRICTED, PROP_INHERIT, ZFS_TYPE_FILESYSTEM,
|
||||
"discard | noallow | restricted | passthrough | passthrough-x",
|
||||
@ -256,7 +267,7 @@ zfs_prop_init(void)
|
||||
/* default index properties */
|
||||
zprop_register_index(ZFS_PROP_VERSION, "version", 0, PROP_DEFAULT,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_SNAPSHOT,
|
||||
"1 | 2 | 3 | 4 | current", "VERSION", version_table);
|
||||
"1 | 2 | 3 | 4 | 5 | current", "VERSION", version_table);
|
||||
zprop_register_index(ZFS_PROP_CANMOUNT, "canmount", ZFS_CANMOUNT_ON,
|
||||
PROP_DEFAULT, ZFS_TYPE_FILESYSTEM, "on | off | noauto",
|
||||
"CANMOUNT", canmount_table);
|
||||
@ -286,6 +297,8 @@ zfs_prop_init(void)
|
||||
/* string properties */
|
||||
zprop_register_string(ZFS_PROP_ORIGIN, "origin", NULL, PROP_READONLY,
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<snapshot>", "ORIGIN");
|
||||
zprop_register_string(ZFS_PROP_CLONES, "clones", NULL, PROP_READONLY,
|
||||
ZFS_TYPE_SNAPSHOT, "<dataset>[,...]", "CLONES");
|
||||
zprop_register_string(ZFS_PROP_MOUNTPOINT, "mountpoint", "/",
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM, "<path> | legacy | none",
|
||||
"MOUNTPOINT");
|
||||
@ -311,6 +324,9 @@ zfs_prop_init(void)
|
||||
zprop_register_number(ZFS_PROP_COMPRESSRATIO, "compressratio", 0,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
"<1.00x or higher if compressed>", "RATIO");
|
||||
zprop_register_number(ZFS_PROP_REFRATIO, "refcompressratio", 0,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
"<1.00x or higher if compressed>", "REFRATIO");
|
||||
zprop_register_number(ZFS_PROP_VOLBLOCKSIZE, "volblocksize",
|
||||
ZVOL_DEFAULT_BLOCKSIZE, PROP_ONETIME,
|
||||
ZFS_TYPE_VOLUME, "512 to 128k, power of 2", "VOLBLOCK");
|
||||
@ -328,6 +344,8 @@ zfs_prop_init(void)
|
||||
ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME, "<size>", "USEDREFRESERV");
|
||||
zprop_register_number(ZFS_PROP_USERREFS, "userrefs", 0, PROP_READONLY,
|
||||
ZFS_TYPE_SNAPSHOT, "<count>", "USERREFS");
|
||||
zprop_register_number(ZFS_PROP_WRITTEN, "written", 0, PROP_READONLY,
|
||||
ZFS_TYPE_DATASET, "<size>", "WRITTEN");
|
||||
|
||||
/* default number properties */
|
||||
zprop_register_number(ZFS_PROP_QUOTA, "quota", 0, PROP_DEFAULT,
|
||||
@ -370,13 +388,6 @@ zfs_prop_init(void)
|
||||
zprop_register_hidden(ZFS_PROP_OBJSETID, "objsetid", PROP_TYPE_NUMBER,
|
||||
PROP_READONLY, ZFS_TYPE_DATASET, "OBJSETID");
|
||||
|
||||
/*
|
||||
* Property to be removed once libbe is integrated
|
||||
*/
|
||||
zprop_register_hidden(ZFS_PROP_PRIVATE, "priv_prop",
|
||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_FILESYSTEM,
|
||||
"PRIV_PROP");
|
||||
|
||||
/* oddball properties */
|
||||
zprop_register_impl(ZFS_PROP_CREATION, "creation", PROP_TYPE_NUMBER, 0,
|
||||
NULL, PROP_READONLY, ZFS_TYPE_DATASET,
|
||||
@ -460,6 +471,18 @@ zfs_prop_userquota(const char *name)
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if this is a valid written@ property.
|
||||
* Note that after the @, any character is valid (eg, another @, for
|
||||
* written@pool/fs@origin).
|
||||
*/
|
||||
boolean_t
|
||||
zfs_prop_written(const char *name)
|
||||
{
|
||||
static const char *prefix = "written@";
|
||||
return (strncmp(name, prefix, strlen(prefix)) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Tables of index types, plus functions to convert between the user view
|
||||
* (strings) and internal representation (uint64_t).
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zio.h>
|
||||
@ -69,14 +71,20 @@ zpool_prop_init(void)
|
||||
ZFS_TYPE_POOL, "<filesystem>", "BOOTFS");
|
||||
zprop_register_string(ZPOOL_PROP_CACHEFILE, "cachefile", NULL,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<file> | none", "CACHEFILE");
|
||||
zprop_register_string(ZPOOL_PROP_COMMENT, "comment", NULL,
|
||||
PROP_DEFAULT, ZFS_TYPE_POOL, "<comment-string>", "COMMENT");
|
||||
|
||||
/* readonly number properties */
|
||||
zprop_register_number(ZPOOL_PROP_SIZE, "size", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "SIZE");
|
||||
zprop_register_number(ZPOOL_PROP_FREE, "free", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "FREE");
|
||||
zprop_register_number(ZPOOL_PROP_FREEING, "freeing", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "FREEING");
|
||||
zprop_register_number(ZPOOL_PROP_ALLOCATED, "allocated", 0,
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "ALLOC");
|
||||
zprop_register_number(ZPOOL_PROP_EXPANDSZ, "expandsize", 0,
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "<size>", "EXPANDSZ");
|
||||
zprop_register_number(ZPOOL_PROP_CAPACITY, "capacity", 0, PROP_READONLY,
|
||||
ZFS_TYPE_POOL, "<size>", "CAP");
|
||||
zprop_register_number(ZPOOL_PROP_GUID, "guid", 0, PROP_READONLY,
|
||||
@ -160,6 +168,26 @@ zpool_prop_default_numeric(zpool_prop_t prop)
|
||||
return (zpool_prop_table[prop].pd_numdefault);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if this is a valid feature@ property.
|
||||
*/
|
||||
boolean_t
|
||||
zpool_prop_feature(const char *name)
|
||||
{
|
||||
static const char *prefix = "feature@";
|
||||
return (strncmp(name, prefix, strlen(prefix)) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if this is a valid unsupported@ property.
|
||||
*/
|
||||
boolean_t
|
||||
zpool_prop_unsupported(const char *name)
|
||||
{
|
||||
static const char *prefix = "unsupported@";
|
||||
return (strncmp(name, prefix, strlen(prefix)) == 0);
|
||||
}
|
||||
|
||||
int
|
||||
zpool_prop_string_to_index(zpool_prop_t prop, const char *string,
|
||||
uint64_t *index)
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
#
|
||||
# Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
# Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
# Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
#
|
||||
|
||||
#
|
||||
@ -190,7 +192,6 @@ GENUNIX_OBJS += \
|
||||
gid.o \
|
||||
groups.o \
|
||||
grow.o \
|
||||
hat.o \
|
||||
hat_refmod.o \
|
||||
id32.o \
|
||||
id_space.o \
|
||||
@ -242,6 +243,7 @@ GENUNIX_OBJS += \
|
||||
nvpair.o \
|
||||
nvpair_alloc_system.o \
|
||||
nvpair_alloc_fixed.o \
|
||||
fnvpair.o \
|
||||
octet.o \
|
||||
open.o \
|
||||
p_online.o \
|
||||
@ -453,6 +455,8 @@ AUDIO810_OBJS += audio810.o
|
||||
|
||||
AUDIOCMI_OBJS += audiocmi.o
|
||||
|
||||
AUDIOCMIHD_OBJS += audiocmihd.o
|
||||
|
||||
AUDIOHD_OBJS += audiohd.o
|
||||
|
||||
AUDIOIXP_OBJS += audioixp.o
|
||||
@ -498,9 +502,9 @@ MD4_OBJS += md4.o md4_mod.o
|
||||
|
||||
MD5_OBJS += md5.o md5_mod.o
|
||||
|
||||
SHA1_OBJS += sha1.o sha1_mod.o fips_sha1_util.o
|
||||
SHA1_OBJS += sha1.o sha1_mod.o
|
||||
|
||||
SHA2_OBJS += sha2.o sha2_mod.o fips_sha2_util.o
|
||||
SHA2_OBJS += sha2.o sha2_mod.o
|
||||
|
||||
IPGPC_OBJS += classifierddi.o classifier.o filters.o trie.o table.o \
|
||||
ba_table.o
|
||||
@ -935,7 +939,7 @@ ST_OBJS += st.o st_conf.o
|
||||
|
||||
EMLXS_OBJS += emlxs_clock.o emlxs_dfc.o emlxs_dhchap.o emlxs_diag.o \
|
||||
emlxs_download.o emlxs_dump.o emlxs_els.o emlxs_event.o \
|
||||
emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \
|
||||
emlxs_fcf.o emlxs_fcp.o emlxs_fct.o emlxs_hba.o emlxs_ip.o \
|
||||
emlxs_mbox.o emlxs_mem.o emlxs_msg.o emlxs_node.o \
|
||||
emlxs_pkt.o emlxs_sli3.o emlxs_sli4.o emlxs_solaris.o \
|
||||
emlxs_thread.o
|
||||
@ -1083,7 +1087,7 @@ DRM_OBJS += drm_sunmod.o drm_kstat.o drm_agpsupport.o \
|
||||
drm_auth.o drm_bufs.o drm_context.o drm_dma.o \
|
||||
drm_drawable.o drm_drv.o drm_fops.o drm_ioctl.o drm_irq.o \
|
||||
drm_lock.o drm_memory.o drm_msg.o drm_pci.o drm_scatter.o \
|
||||
drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o
|
||||
drm_cache.o drm_gem.o drm_mm.o ati_pcigart.o
|
||||
|
||||
FM_OBJS += devfm.o devfm_machdep.o
|
||||
|
||||
@ -1325,6 +1329,7 @@ ZFS_COMMON_OBJS += \
|
||||
arc.o \
|
||||
bplist.o \
|
||||
bpobj.o \
|
||||
bptree.o \
|
||||
dbuf.o \
|
||||
ddt.o \
|
||||
ddt_zap.o \
|
||||
@ -1346,6 +1351,7 @@ ZFS_COMMON_OBJS += \
|
||||
dsl_deleg.o \
|
||||
dsl_prop.o \
|
||||
dsl_scan.o \
|
||||
zfeature.o \
|
||||
gzip.o \
|
||||
lzjb.o \
|
||||
metaslab.o \
|
||||
@ -1388,11 +1394,12 @@ ZFS_COMMON_OBJS += \
|
||||
zrlock.o
|
||||
|
||||
ZFS_SHARED_OBJS += \
|
||||
zfs_namecheck.o \
|
||||
zfs_deleg.o \
|
||||
zfs_prop.o \
|
||||
zfeature_common.o \
|
||||
zfs_comutil.o \
|
||||
zfs_deleg.o \
|
||||
zfs_fletcher.o \
|
||||
zfs_namecheck.o \
|
||||
zfs_prop.o \
|
||||
zpool_prop.o \
|
||||
zprop_common.o
|
||||
|
||||
@ -1519,7 +1526,7 @@ KCF_OBJS += kcf.o kcf_callprov.o kcf_cbufcall.o kcf_cipher.o kcf_crypto.o \
|
||||
kcf_object.o kcf_policy.o kcf_prov_lib.o kcf_prov_tabs.o \
|
||||
kcf_sched.o kcf_session.o kcf_sign.o kcf_spi.o kcf_verify.o \
|
||||
kcf_random.o modes.o ecb.o cbc.o ctr.o ccm.o gcm.o \
|
||||
fips_random.o fips_checksum.o fips_test_vectors.o
|
||||
fips_random.o
|
||||
|
||||
CRYPTOADM_OBJS += cryptoadm.o
|
||||
|
||||
@ -1530,7 +1537,7 @@ DPROV_OBJS += dprov.o
|
||||
DCA_OBJS += dca.o dca_3des.o dca_debug.o dca_dsa.o dca_kstat.o dca_rng.o \
|
||||
dca_rsa.o
|
||||
|
||||
AESPROV_OBJS += aes.o aes_impl.o aes_modes.o fips_aes_util.o
|
||||
AESPROV_OBJS += aes.o aes_impl.o aes_modes.o
|
||||
|
||||
ARCFOURPROV_OBJS += arcfour.o arcfour_crypt.o
|
||||
|
||||
@ -1541,16 +1548,16 @@ ECCPROV_OBJS += ecc.o ec.o ec2_163.o ec2_mont.o ecdecode.o ecl_mult.o \
|
||||
ecp_jm.o ec2_233.o ecl_curve.o ecp_224.o ecp_aff.o \
|
||||
ecp_mont.o ec2_aff.o ec_naf.o ecl_gf.o ecp_256.o mp_gf2m.o \
|
||||
mpi.o mplogic.o mpmontg.o mpprime.o oid.o \
|
||||
secitem.o ec2_test.o ecp_test.o fips_ecc_util.o
|
||||
secitem.o ec2_test.o ecp_test.o
|
||||
|
||||
RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o fips_rsa_util.o
|
||||
RSAPROV_OBJS += rsa.o rsa_impl.o pkcs1.o
|
||||
|
||||
SWRANDPROV_OBJS += swrand.o fips_random_util.o
|
||||
SWRANDPROV_OBJS += swrand.o
|
||||
|
||||
#
|
||||
# kernel SSL
|
||||
#
|
||||
KSSL_OBJS += kssl.o ksslioctl.o
|
||||
KSSL_OBJS += kssl.o ksslioctl.o
|
||||
|
||||
KSSL_SOCKFIL_MOD_OBJS += ksslfilter.o ksslapi.o ksslrec.o
|
||||
|
||||
@ -1664,7 +1671,7 @@ KGSS_KRB5_OBJS += krb5mech.o \
|
||||
$(CRYPTO_OLD) \
|
||||
$(CRYPTO_RAW) $(K5_KRB) $(K5_OS)
|
||||
|
||||
DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o fips_des_util.o
|
||||
DES_OBJS += des_crypt.o des_impl.o des_ks.o des_soft.o
|
||||
|
||||
DLBOOT_OBJS += bootparam_xdr.o nfs_dlinet.o scan.o
|
||||
|
||||
@ -1763,6 +1770,8 @@ BGE_OBJS += bge_main2.o bge_chip2.o bge_kstats.o bge_log.o bge_ndd.o \
|
||||
|
||||
DMFE_OBJS += dmfe_log.o dmfe_main.o dmfe_mii.o
|
||||
|
||||
EFE_OBJS += efe.o
|
||||
|
||||
ELXL_OBJS += elxl.o
|
||||
|
||||
HME_OBJS += hme.o
|
||||
@ -1773,6 +1782,8 @@ IXGB_OBJS += ixgb.o ixgb_atomic.o ixgb_chip.o ixgb_gld.o ixgb_kstats.o \
|
||||
NGE_OBJS += nge_main.o nge_atomic.o nge_chip.o nge_ndd.o nge_kstats.o \
|
||||
nge_log.o nge_rx.o nge_tx.o nge_xmii.o
|
||||
|
||||
PCN_OBJS += pcn.o
|
||||
|
||||
RGE_OBJS += rge_main.o rge_chip.o rge_ndd.o rge_kstats.o rge_log.o rge_rxtx.o
|
||||
|
||||
URTW_OBJS += urtw.o
|
||||
@ -1897,6 +1908,11 @@ IGB_OBJS = igb_82575.o igb_api.o igb_mac.o igb_manage.o \
|
||||
igb_debug.o igb_gld.o igb_log.o igb_main.o \
|
||||
igb_rx.o igb_stat.o igb_tx.o
|
||||
|
||||
#
|
||||
# Intel Pro/100 NIC driver module
|
||||
#
|
||||
IPRB_OBJS = iprb.o
|
||||
|
||||
#
|
||||
# Intel 10GbE PCIE NIC driver module
|
||||
#
|
||||
@ -1932,11 +1948,6 @@ NXGE_HCALL_OBJS = \
|
||||
#
|
||||
KICONV_EMEA_OBJS += kiconv_emea.o
|
||||
|
||||
#
|
||||
# blk2scsa
|
||||
#
|
||||
BLK2SCSA_OBJS = blk2scsa.o
|
||||
|
||||
KICONV_JA_OBJS += kiconv_ja.o
|
||||
|
||||
KICONV_KO_OBJS += kiconv_cck_common.o kiconv_ko.o
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -24,6 +24,9 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/atomic.h>
|
||||
#include <sys/errno.h>
|
||||
@ -273,7 +276,7 @@ fasttrap_pid_cleanup_cb(void *data)
|
||||
fasttrap_provider_t **fpp, *fp;
|
||||
fasttrap_bucket_t *bucket;
|
||||
dtrace_provider_id_t provid;
|
||||
int i, later;
|
||||
int i, later, rval;
|
||||
|
||||
static volatile int in = 0;
|
||||
ASSERT(in == 0);
|
||||
@ -335,9 +338,13 @@ fasttrap_pid_cleanup_cb(void *data)
|
||||
* clean out the unenabled probes.
|
||||
*/
|
||||
provid = fp->ftp_provid;
|
||||
if (dtrace_unregister(provid) != 0) {
|
||||
if ((rval = dtrace_unregister(provid)) != 0) {
|
||||
if (fasttrap_total > fasttrap_max / 2)
|
||||
(void) dtrace_condense(provid);
|
||||
|
||||
if (rval == EAGAIN)
|
||||
fp->ftp_marked = 1;
|
||||
|
||||
later += fp->ftp_marked;
|
||||
fpp = &fp->ftp_next;
|
||||
} else {
|
||||
@ -363,12 +370,16 @@ fasttrap_pid_cleanup_cb(void *data)
|
||||
* get a chance to do that work if and when the timeout is reenabled
|
||||
* (if detach fails).
|
||||
*/
|
||||
if (later > 0 && fasttrap_timeout != (timeout_id_t)1)
|
||||
fasttrap_timeout = timeout(&fasttrap_pid_cleanup_cb, NULL, hz);
|
||||
else if (later > 0)
|
||||
if (later > 0) {
|
||||
if (fasttrap_timeout != (timeout_id_t)1) {
|
||||
fasttrap_timeout =
|
||||
timeout(&fasttrap_pid_cleanup_cb, NULL, hz);
|
||||
}
|
||||
|
||||
fasttrap_cleanup_work = 1;
|
||||
else
|
||||
} else {
|
||||
fasttrap_timeout = 0;
|
||||
}
|
||||
|
||||
mutex_exit(&fasttrap_cleanup_mtx);
|
||||
in = 0;
|
||||
|
@ -23,6 +23,9 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/errno.h>
|
||||
#include <sys/stat.h>
|
||||
@ -408,9 +411,25 @@ profile_disable(void *arg, dtrace_id_t id, void *parg)
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
profile_usermode(void *arg, dtrace_id_t id, void *parg)
|
||||
profile_mode(void *arg, dtrace_id_t id, void *parg)
|
||||
{
|
||||
return (CPU->cpu_profile_pc == 0);
|
||||
profile_probe_t *prof = parg;
|
||||
int mode;
|
||||
|
||||
if (CPU->cpu_profile_pc != 0) {
|
||||
mode = DTRACE_MODE_KERNEL;
|
||||
} else {
|
||||
mode = DTRACE_MODE_USER;
|
||||
}
|
||||
|
||||
if (prof->prof_kind == PROF_TICK) {
|
||||
mode |= DTRACE_MODE_NOPRIV_RESTRICT;
|
||||
} else {
|
||||
ASSERT(prof->prof_kind == PROF_PROFILE);
|
||||
mode |= DTRACE_MODE_NOPRIV_DROP;
|
||||
}
|
||||
|
||||
return (mode);
|
||||
}
|
||||
|
||||
static dtrace_pattr_t profile_attr = {
|
||||
@ -430,7 +449,7 @@ static dtrace_pops_t profile_pops = {
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
profile_usermode,
|
||||
profile_mode,
|
||||
profile_destroy
|
||||
};
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -78,9 +80,9 @@
|
||||
* types of locks: 1) the hash table lock array, and 2) the
|
||||
* arc list locks.
|
||||
*
|
||||
* Buffers do not have their own mutexs, rather they rely on the
|
||||
* hash table mutexs for the bulk of their protection (i.e. most
|
||||
* fields in the arc_buf_hdr_t are protected by these mutexs).
|
||||
* Buffers do not have their own mutexes, rather they rely on the
|
||||
* hash table mutexes for the bulk of their protection (i.e. most
|
||||
* fields in the arc_buf_hdr_t are protected by these mutexes).
|
||||
*
|
||||
* buf_hash_find() returns the appropriate mutex (held) when it
|
||||
* locates the requested buffer in the hash table. It returns
|
||||
@ -1217,7 +1219,7 @@ arc_buf_alloc(spa_t *spa, int size, void *tag, arc_buf_contents_t type)
|
||||
ASSERT(BUF_EMPTY(hdr));
|
||||
hdr->b_size = size;
|
||||
hdr->b_type = type;
|
||||
hdr->b_spa = spa_guid(spa);
|
||||
hdr->b_spa = spa_load_guid(spa);
|
||||
hdr->b_state = arc_anon;
|
||||
hdr->b_arc_access = 0;
|
||||
buf = kmem_cache_alloc(buf_cache, KM_PUSHPAGE);
|
||||
@ -1919,7 +1921,7 @@ arc_flush(spa_t *spa)
|
||||
uint64_t guid = 0;
|
||||
|
||||
if (spa)
|
||||
guid = spa_guid(spa);
|
||||
guid = spa_load_guid(spa);
|
||||
|
||||
while (list_head(&arc_mru->arcs_list[ARC_BUFC_DATA])) {
|
||||
(void) arc_evict(arc_mru, guid, -1, FALSE, ARC_BUFC_DATA);
|
||||
@ -1980,6 +1982,11 @@ arc_shrink(void)
|
||||
arc_adjust();
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine if the system is under memory pressure and is asking
|
||||
* to reclaim memory. A return value of 1 indicates that the system
|
||||
* is under memory pressure and that the arc should adjust accordingly.
|
||||
*/
|
||||
static int
|
||||
arc_reclaim_needed(void)
|
||||
{
|
||||
@ -2027,11 +2034,24 @@ arc_reclaim_needed(void)
|
||||
* heap is allocated. (Or, in the calculation, if less than 1/4th is
|
||||
* free)
|
||||
*/
|
||||
if (btop(vmem_size(heap_arena, VMEM_FREE)) <
|
||||
(btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2))
|
||||
if (vmem_size(heap_arena, VMEM_FREE) <
|
||||
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2))
|
||||
return (1);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If zio data pages are being allocated out of a separate heap segment,
|
||||
* then enforce that the size of available vmem for this arena remains
|
||||
* above about 1/16th free.
|
||||
*
|
||||
* Note: The 1/16th arena free requirement was put in place
|
||||
* to aggressively evict memory from the arc in order to avoid
|
||||
* memory fragmentation issues.
|
||||
*/
|
||||
if (zio_arena != NULL &&
|
||||
vmem_size(zio_arena, VMEM_FREE) <
|
||||
(vmem_size(zio_arena, VMEM_ALLOC) >> 4))
|
||||
return (1);
|
||||
#else
|
||||
if (spa_get_random(100) == 0)
|
||||
return (1);
|
||||
@ -2083,6 +2103,13 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat)
|
||||
}
|
||||
kmem_cache_reap_now(buf_cache);
|
||||
kmem_cache_reap_now(hdr_cache);
|
||||
|
||||
/*
|
||||
* Ask the vmem areana to reclaim unused memory from its
|
||||
* quantum caches.
|
||||
*/
|
||||
if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
|
||||
vmem_qcache_reap(zio_arena);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -2216,18 +2243,6 @@ arc_evict_needed(arc_buf_contents_t type)
|
||||
if (type == ARC_BUFC_METADATA && arc_meta_used >= arc_meta_limit)
|
||||
return (1);
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* If zio data pages are being allocated out of a separate heap segment,
|
||||
* then enforce that the size of available vmem for this area remains
|
||||
* above about 1/32nd free.
|
||||
*/
|
||||
if (type == ARC_BUFC_DATA && zio_arena != NULL &&
|
||||
vmem_size(zio_arena, VMEM_FREE) <
|
||||
(vmem_size(zio_arena, VMEM_ALLOC) >> 5))
|
||||
return (1);
|
||||
#endif
|
||||
|
||||
if (arc_reclaim_needed())
|
||||
return (1);
|
||||
|
||||
@ -2532,9 +2547,11 @@ arc_read_done(zio_t *zio)
|
||||
callback_list = hdr->b_acb;
|
||||
ASSERT(callback_list != NULL);
|
||||
if (BP_SHOULD_BYTESWAP(zio->io_bp) && zio->io_error == 0) {
|
||||
dmu_object_byteswap_t bswap =
|
||||
DMU_OT_BYTESWAP(BP_GET_TYPE(zio->io_bp));
|
||||
arc_byteswap_func_t *func = BP_GET_LEVEL(zio->io_bp) > 0 ?
|
||||
byteswap_uint64_array :
|
||||
dmu_ot[BP_GET_TYPE(zio->io_bp)].ot_byteswap;
|
||||
dmu_ot_byteswap[bswap].ob_func;
|
||||
func(buf->b_data, hdr->b_size);
|
||||
}
|
||||
|
||||
@ -2619,7 +2636,7 @@ arc_read_done(zio_t *zio)
|
||||
}
|
||||
|
||||
/*
|
||||
* "Read" the block block at the specified DVA (in bp) via the
|
||||
* "Read" the block at the specified DVA (in bp) via the
|
||||
* cache. If the block is found in the cache, invoke the provided
|
||||
* callback immediately and return. Note that the `zio' parameter
|
||||
* in the callback will be NULL in this case, since no IO was
|
||||
@ -2676,7 +2693,7 @@ arc_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||
arc_buf_t *buf;
|
||||
kmutex_t *hash_lock;
|
||||
zio_t *rzio;
|
||||
uint64_t guid = spa_guid(spa);
|
||||
uint64_t guid = spa_load_guid(spa);
|
||||
|
||||
top:
|
||||
hdr = buf_hash_find(guid, BP_IDENTITY(bp), BP_PHYSICAL_BIRTH(bp),
|
||||
@ -4234,7 +4251,7 @@ l2arc_write_buffers(spa_t *spa, l2arc_dev_t *dev, uint64_t target_sz)
|
||||
boolean_t have_lock, full;
|
||||
l2arc_write_callback_t *cb;
|
||||
zio_t *pio, *wzio;
|
||||
uint64_t guid = spa_guid(spa);
|
||||
uint64_t guid = spa_load_guid(spa);
|
||||
|
||||
ASSERT(dev->l2ad_vdev != NULL);
|
||||
|
||||
|
@ -20,11 +20,13 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/bpobj.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
|
||||
uint64_t
|
||||
bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
|
||||
@ -440,7 +442,10 @@ space_range_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
struct space_range_arg *sra = arg;
|
||||
|
||||
if (bp->blk_birth > sra->mintxg && bp->blk_birth <= sra->maxtxg) {
|
||||
sra->used += bp_get_dsize_sync(sra->spa, bp);
|
||||
if (dsl_pool_sync_context(spa_get_dsl(sra->spa)))
|
||||
sra->used += bp_get_dsize_sync(sra->spa, bp);
|
||||
else
|
||||
sra->used += bp_get_dsize(sra->spa, bp);
|
||||
sra->comp += BP_GET_PSIZE(bp);
|
||||
sra->uncomp += BP_GET_UCSIZE(bp);
|
||||
}
|
||||
|
224
uts/common/fs/zfs/bptree.c
Normal file
224
uts/common/fs/zfs/bptree.c
Normal file
@ -0,0 +1,224 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/arc.h>
|
||||
#include <sys/bptree.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/dmu_traverse.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/dsl_dir.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/spa.h>
|
||||
|
||||
/*
|
||||
* A bptree is a queue of root block pointers from destroyed datasets. When a
|
||||
* dataset is destroyed its root block pointer is put on the end of the pool's
|
||||
* bptree queue so the dataset's blocks can be freed asynchronously by
|
||||
* dsl_scan_sync. This allows the delete operation to finish without traversing
|
||||
* all the dataset's blocks.
|
||||
*
|
||||
* Note that while bt_begin and bt_end are only ever incremented in this code
|
||||
* they are effectively reset to 0 every time the entire bptree is freed because
|
||||
* the bptree's object is destroyed and re-created.
|
||||
*/
|
||||
|
||||
struct bptree_args {
|
||||
bptree_phys_t *ba_phys; /* data in bonus buffer, dirtied if freeing */
|
||||
boolean_t ba_free; /* true if freeing during traversal */
|
||||
|
||||
bptree_itor_t *ba_func; /* function to call for each blockpointer */
|
||||
void *ba_arg; /* caller supplied argument to ba_func */
|
||||
dmu_tx_t *ba_tx; /* caller supplied tx, NULL if not freeing */
|
||||
} bptree_args_t;
|
||||
|
||||
uint64_t
|
||||
bptree_alloc(objset_t *os, dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t obj;
|
||||
dmu_buf_t *db;
|
||||
bptree_phys_t *bt;
|
||||
|
||||
obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
|
||||
SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
|
||||
sizeof (bptree_phys_t), tx);
|
||||
|
||||
/*
|
||||
* Bonus buffer contents are already initialized to 0, but for
|
||||
* readability we make it explicit.
|
||||
*/
|
||||
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
bt = db->db_data;
|
||||
bt->bt_begin = 0;
|
||||
bt->bt_end = 0;
|
||||
bt->bt_bytes = 0;
|
||||
bt->bt_comp = 0;
|
||||
bt->bt_uncomp = 0;
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
return (obj);
|
||||
}
|
||||
|
||||
int
|
||||
bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
bptree_phys_t *bt;
|
||||
|
||||
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
|
||||
bt = db->db_data;
|
||||
ASSERT3U(bt->bt_begin, ==, bt->bt_end);
|
||||
ASSERT3U(bt->bt_bytes, ==, 0);
|
||||
ASSERT3U(bt->bt_comp, ==, 0);
|
||||
ASSERT3U(bt->bt_uncomp, ==, 0);
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
return (dmu_object_free(os, obj, tx));
|
||||
}
|
||||
|
||||
void
|
||||
bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
|
||||
uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
bptree_phys_t *bt;
|
||||
bptree_entry_phys_t bte;
|
||||
|
||||
/*
|
||||
* bptree objects are in the pool mos, therefore they can only be
|
||||
* modified in syncing context. Furthermore, this is only modified
|
||||
* by the sync thread, so no locking is necessary.
|
||||
*/
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
VERIFY3U(0, ==, dmu_bonus_hold(os, obj, FTAG, &db));
|
||||
bt = db->db_data;
|
||||
|
||||
bte.be_birth_txg = birth_txg;
|
||||
bte.be_bp = *bp;
|
||||
bzero(&bte.be_zb, sizeof (bte.be_zb));
|
||||
dmu_write(os, obj, bt->bt_end * sizeof (bte), sizeof (bte), &bte, tx);
|
||||
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
bt->bt_end++;
|
||||
bt->bt_bytes += bytes;
|
||||
bt->bt_comp += comp;
|
||||
bt->bt_uncomp += uncomp;
|
||||
dmu_buf_rele(db, FTAG);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
bptree_visit_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
int err;
|
||||
struct bptree_args *ba = arg;
|
||||
|
||||
if (bp == NULL)
|
||||
return (0);
|
||||
|
||||
err = ba->ba_func(ba->ba_arg, bp, ba->ba_tx);
|
||||
if (err == 0 && ba->ba_free) {
|
||||
ba->ba_phys->bt_bytes -= bp_get_dsize_sync(spa, bp);
|
||||
ba->ba_phys->bt_comp -= BP_GET_PSIZE(bp);
|
||||
ba->ba_phys->bt_uncomp -= BP_GET_UCSIZE(bp);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
|
||||
void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
int err;
|
||||
uint64_t i;
|
||||
dmu_buf_t *db;
|
||||
struct bptree_args ba;
|
||||
|
||||
ASSERT(!free || dmu_tx_is_syncing(tx));
|
||||
|
||||
err = dmu_bonus_hold(os, obj, FTAG, &db);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
if (free)
|
||||
dmu_buf_will_dirty(db, tx);
|
||||
|
||||
ba.ba_phys = db->db_data;
|
||||
ba.ba_free = free;
|
||||
ba.ba_func = func;
|
||||
ba.ba_arg = arg;
|
||||
ba.ba_tx = tx;
|
||||
|
||||
err = 0;
|
||||
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
|
||||
bptree_entry_phys_t bte;
|
||||
|
||||
ASSERT(!free || i == ba.ba_phys->bt_begin);
|
||||
|
||||
err = dmu_read(os, obj, i * sizeof (bte), sizeof (bte),
|
||||
&bte, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
break;
|
||||
|
||||
err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
|
||||
bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST,
|
||||
bptree_visit_cb, &ba);
|
||||
if (free) {
|
||||
ASSERT(err == 0 || err == ERESTART);
|
||||
if (err != 0) {
|
||||
/* save bookmark for future resume */
|
||||
ASSERT3U(bte.be_zb.zb_objset, ==,
|
||||
ZB_DESTROYED_OBJSET);
|
||||
ASSERT3U(bte.be_zb.zb_level, ==, 0);
|
||||
dmu_write(os, obj, i * sizeof (bte),
|
||||
sizeof (bte), &bte, tx);
|
||||
break;
|
||||
} else {
|
||||
ba.ba_phys->bt_begin++;
|
||||
(void) dmu_free_range(os, obj,
|
||||
i * sizeof (bte), sizeof (bte), tx);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT(!free || err != 0 || ba.ba_phys->bt_begin == ba.ba_phys->bt_end);
|
||||
|
||||
/* if all blocks are free there should be no used space */
|
||||
if (ba.ba_phys->bt_begin == ba.ba_phys->bt_end) {
|
||||
ASSERT3U(ba.ba_phys->bt_bytes, ==, 0);
|
||||
ASSERT3U(ba.ba_phys->bt_comp, ==, 0);
|
||||
ASSERT3U(ba.ba_phys->bt_uncomp, ==, 0);
|
||||
}
|
||||
|
||||
dmu_buf_rele(db, FTAG);
|
||||
|
||||
return (err);
|
||||
}
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -226,7 +228,7 @@ dbuf_is_metadata(dmu_buf_impl_t *db)
|
||||
boolean_t is_metadata;
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
is_metadata = dmu_ot[DB_DNODE(db)->dn_type].ot_metadata;
|
||||
is_metadata = DMU_OT_IS_METADATA(DB_DNODE(db)->dn_type);
|
||||
DB_DNODE_EXIT(db);
|
||||
|
||||
return (is_metadata);
|
||||
@ -1300,13 +1302,17 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
* it, since one of the current holders may be in the
|
||||
* middle of an update. Note that users of dbuf_undirty()
|
||||
* should not place a hold on the dbuf before the call.
|
||||
* Also note: we can get here with a spill block, so
|
||||
* test for that similar to how dbuf_dirty does.
|
||||
*/
|
||||
if (refcount_count(&db->db_holds) > db->db_dirtycnt) {
|
||||
mutex_exit(&db->db_mtx);
|
||||
/* Make sure we don't toss this buffer at sync phase */
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
dnode_clear_range(dn, db->db_blkid, 1, tx);
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
if (db->db_blkid != DMU_SPILL_BLKID) {
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
dnode_clear_range(dn, db->db_blkid, 1, tx);
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
}
|
||||
DB_DNODE_EXIT(db);
|
||||
return (0);
|
||||
}
|
||||
@ -1319,11 +1325,18 @@ dbuf_undirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
|
||||
*drp = dr->dr_next;
|
||||
|
||||
/*
|
||||
* Note that there are three places in dbuf_dirty()
|
||||
* where this dirty record may be put on a list.
|
||||
* Make sure to do a list_remove corresponding to
|
||||
* every one of those list_insert calls.
|
||||
*/
|
||||
if (dr->dr_parent) {
|
||||
mutex_enter(&dr->dr_parent->dt.di.dr_mtx);
|
||||
list_remove(&dr->dr_parent->dt.di.dr_children, dr);
|
||||
mutex_exit(&dr->dr_parent->dt.di.dr_mtx);
|
||||
} else if (db->db_level+1 == dn->dn_nlevels) {
|
||||
} else if (db->db_blkid == DMU_SPILL_BLKID ||
|
||||
db->db_level+1 == dn->dn_nlevels) {
|
||||
ASSERT(db->db_blkptr == NULL || db->db_parent == dn->dn_dbuf);
|
||||
mutex_enter(&dn->dn_mtx);
|
||||
list_remove(&dn->dn_dirty_records[txg & TXG_MASK], dr);
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -1061,11 +1062,9 @@ ddt_sync_table(ddt_t *ddt, dmu_tx_t *tx, uint64_t txg)
|
||||
ASSERT(spa->spa_uberblock.ub_version >= SPA_VERSION_DEDUP);
|
||||
|
||||
if (spa->spa_ddt_stat_object == 0) {
|
||||
spa->spa_ddt_stat_object = zap_create(ddt->ddt_os,
|
||||
DMU_OT_DDT_STATS, DMU_OT_NONE, 0, tx);
|
||||
VERIFY(zap_add(ddt->ddt_os, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_DDT_STATS, sizeof (uint64_t), 1,
|
||||
&spa->spa_ddt_stat_object, tx) == 0);
|
||||
spa->spa_ddt_stat_object = zap_create_link(ddt->ddt_os,
|
||||
DMU_OT_DDT_STATS, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_DDT_STATS, tx);
|
||||
}
|
||||
|
||||
while ((dde = avl_destroy_nodes(&ddt->ddt_tree, &cookie)) != NULL) {
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -46,60 +47,73 @@
|
||||
#endif
|
||||
|
||||
const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES] = {
|
||||
{ byteswap_uint8_array, TRUE, "unallocated" },
|
||||
{ zap_byteswap, TRUE, "object directory" },
|
||||
{ byteswap_uint64_array, TRUE, "object array" },
|
||||
{ byteswap_uint8_array, TRUE, "packed nvlist" },
|
||||
{ byteswap_uint64_array, TRUE, "packed nvlist size" },
|
||||
{ byteswap_uint64_array, TRUE, "bpobj" },
|
||||
{ byteswap_uint64_array, TRUE, "bpobj header" },
|
||||
{ byteswap_uint64_array, TRUE, "SPA space map header" },
|
||||
{ byteswap_uint64_array, TRUE, "SPA space map" },
|
||||
{ byteswap_uint64_array, TRUE, "ZIL intent log" },
|
||||
{ dnode_buf_byteswap, TRUE, "DMU dnode" },
|
||||
{ dmu_objset_byteswap, TRUE, "DMU objset" },
|
||||
{ byteswap_uint64_array, TRUE, "DSL directory" },
|
||||
{ zap_byteswap, TRUE, "DSL directory child map"},
|
||||
{ zap_byteswap, TRUE, "DSL dataset snap map" },
|
||||
{ zap_byteswap, TRUE, "DSL props" },
|
||||
{ byteswap_uint64_array, TRUE, "DSL dataset" },
|
||||
{ zfs_znode_byteswap, TRUE, "ZFS znode" },
|
||||
{ zfs_oldacl_byteswap, TRUE, "ZFS V0 ACL" },
|
||||
{ byteswap_uint8_array, FALSE, "ZFS plain file" },
|
||||
{ zap_byteswap, TRUE, "ZFS directory" },
|
||||
{ zap_byteswap, TRUE, "ZFS master node" },
|
||||
{ zap_byteswap, TRUE, "ZFS delete queue" },
|
||||
{ byteswap_uint8_array, FALSE, "zvol object" },
|
||||
{ zap_byteswap, TRUE, "zvol prop" },
|
||||
{ byteswap_uint8_array, FALSE, "other uint8[]" },
|
||||
{ byteswap_uint64_array, FALSE, "other uint64[]" },
|
||||
{ zap_byteswap, TRUE, "other ZAP" },
|
||||
{ zap_byteswap, TRUE, "persistent error log" },
|
||||
{ byteswap_uint8_array, TRUE, "SPA history" },
|
||||
{ byteswap_uint64_array, TRUE, "SPA history offsets" },
|
||||
{ zap_byteswap, TRUE, "Pool properties" },
|
||||
{ zap_byteswap, TRUE, "DSL permissions" },
|
||||
{ zfs_acl_byteswap, TRUE, "ZFS ACL" },
|
||||
{ byteswap_uint8_array, TRUE, "ZFS SYSACL" },
|
||||
{ byteswap_uint8_array, TRUE, "FUID table" },
|
||||
{ byteswap_uint64_array, TRUE, "FUID table size" },
|
||||
{ zap_byteswap, TRUE, "DSL dataset next clones"},
|
||||
{ zap_byteswap, TRUE, "scan work queue" },
|
||||
{ zap_byteswap, TRUE, "ZFS user/group used" },
|
||||
{ zap_byteswap, TRUE, "ZFS user/group quota" },
|
||||
{ zap_byteswap, TRUE, "snapshot refcount tags"},
|
||||
{ zap_byteswap, TRUE, "DDT ZAP algorithm" },
|
||||
{ zap_byteswap, TRUE, "DDT statistics" },
|
||||
{ byteswap_uint8_array, TRUE, "System attributes" },
|
||||
{ zap_byteswap, TRUE, "SA master node" },
|
||||
{ zap_byteswap, TRUE, "SA attr registration" },
|
||||
{ zap_byteswap, TRUE, "SA attr layouts" },
|
||||
{ zap_byteswap, TRUE, "scan translations" },
|
||||
{ byteswap_uint8_array, FALSE, "deduplicated block" },
|
||||
{ zap_byteswap, TRUE, "DSL deadlist map" },
|
||||
{ byteswap_uint64_array, TRUE, "DSL deadlist map hdr" },
|
||||
{ zap_byteswap, TRUE, "DSL dir clones" },
|
||||
{ byteswap_uint64_array, TRUE, "bpobj subobj" },
|
||||
{ DMU_BSWAP_UINT8, TRUE, "unallocated" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "object directory" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "object array" },
|
||||
{ DMU_BSWAP_UINT8, TRUE, "packed nvlist" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "packed nvlist size" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "bpobj" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "bpobj header" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "SPA space map header" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "SPA space map" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "ZIL intent log" },
|
||||
{ DMU_BSWAP_DNODE, TRUE, "DMU dnode" },
|
||||
{ DMU_BSWAP_OBJSET, TRUE, "DMU objset" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "DSL directory" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL directory child map"},
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL dataset snap map" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL props" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "DSL dataset" },
|
||||
{ DMU_BSWAP_ZNODE, TRUE, "ZFS znode" },
|
||||
{ DMU_BSWAP_OLDACL, TRUE, "ZFS V0 ACL" },
|
||||
{ DMU_BSWAP_UINT8, FALSE, "ZFS plain file" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "ZFS directory" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "ZFS master node" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "ZFS delete queue" },
|
||||
{ DMU_BSWAP_UINT8, FALSE, "zvol object" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "zvol prop" },
|
||||
{ DMU_BSWAP_UINT8, FALSE, "other uint8[]" },
|
||||
{ DMU_BSWAP_UINT64, FALSE, "other uint64[]" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "other ZAP" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "persistent error log" },
|
||||
{ DMU_BSWAP_UINT8, TRUE, "SPA history" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "SPA history offsets" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "Pool properties" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL permissions" },
|
||||
{ DMU_BSWAP_ACL, TRUE, "ZFS ACL" },
|
||||
{ DMU_BSWAP_UINT8, TRUE, "ZFS SYSACL" },
|
||||
{ DMU_BSWAP_UINT8, TRUE, "FUID table" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "FUID table size" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL dataset next clones"},
|
||||
{ DMU_BSWAP_ZAP, TRUE, "scan work queue" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "ZFS user/group used" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "ZFS user/group quota" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "snapshot refcount tags"},
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DDT ZAP algorithm" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DDT statistics" },
|
||||
{ DMU_BSWAP_UINT8, TRUE, "System attributes" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "SA master node" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "SA attr registration" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "SA attr layouts" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "scan translations" },
|
||||
{ DMU_BSWAP_UINT8, FALSE, "deduplicated block" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL deadlist map" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "DSL deadlist map hdr" },
|
||||
{ DMU_BSWAP_ZAP, TRUE, "DSL dir clones" },
|
||||
{ DMU_BSWAP_UINT64, TRUE, "bpobj subobj" }
|
||||
};
|
||||
|
||||
const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
|
||||
{ byteswap_uint8_array, "uint8" },
|
||||
{ byteswap_uint16_array, "uint16" },
|
||||
{ byteswap_uint32_array, "uint32" },
|
||||
{ byteswap_uint64_array, "uint64" },
|
||||
{ zap_byteswap, "zap" },
|
||||
{ dnode_buf_byteswap, "dnode" },
|
||||
{ dmu_objset_byteswap, "objset" },
|
||||
{ zfs_znode_byteswap, "znode" },
|
||||
{ zfs_oldacl_byteswap, "oldacl" },
|
||||
{ zfs_acl_byteswap, "acl" }
|
||||
};
|
||||
|
||||
int
|
||||
@ -176,7 +190,7 @@ dmu_set_bonustype(dmu_buf_t *db_fake, dmu_object_type_t type, dmu_tx_t *tx)
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
|
||||
if (type > DMU_OT_NUMTYPES) {
|
||||
if (!DMU_OT_IS_VALID(type)) {
|
||||
error = EINVAL;
|
||||
} else if (dn->dn_bonus != db) {
|
||||
error = EINVAL;
|
||||
@ -1503,7 +1517,7 @@ void
|
||||
dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp, zio_prop_t *zp)
|
||||
{
|
||||
dmu_object_type_t type = dn ? dn->dn_type : DMU_OT_OBJSET;
|
||||
boolean_t ismd = (level > 0 || dmu_ot[type].ot_metadata ||
|
||||
boolean_t ismd = (level > 0 || DMU_OT_IS_METADATA(type) ||
|
||||
(wp & WP_SPILL));
|
||||
enum zio_checksum checksum = os->os_checksum;
|
||||
enum zio_compress compress = os->os_compress;
|
||||
|
@ -20,6 +20,9 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -44,50 +47,38 @@
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/zfs_onexit.h>
|
||||
|
||||
/* Set this tunable to TRUE to replace corrupt data with 0x2f5baddb10c */
|
||||
int zfs_send_corrupt_data = B_FALSE;
|
||||
|
||||
static char *dmu_recv_tag = "dmu_recv_tag";
|
||||
|
||||
/*
|
||||
* The list of data whose inclusion in a send stream can be pending from
|
||||
* one call to backup_cb to another. Multiple calls to dump_free() and
|
||||
* dump_freeobjects() can be aggregated into a single DRR_FREE or
|
||||
* DRR_FREEOBJECTS replay record.
|
||||
*/
|
||||
typedef enum {
|
||||
PENDING_NONE,
|
||||
PENDING_FREE,
|
||||
PENDING_FREEOBJECTS
|
||||
} pendop_t;
|
||||
|
||||
struct backuparg {
|
||||
dmu_replay_record_t *drr;
|
||||
vnode_t *vp;
|
||||
offset_t *off;
|
||||
objset_t *os;
|
||||
zio_cksum_t zc;
|
||||
uint64_t toguid;
|
||||
int err;
|
||||
pendop_t pending_op;
|
||||
};
|
||||
|
||||
static int
|
||||
dump_bytes(struct backuparg *ba, void *buf, int len)
|
||||
dump_bytes(dmu_sendarg_t *dsp, void *buf, int len)
|
||||
{
|
||||
dsl_dataset_t *ds = dsp->dsa_os->os_dsl_dataset;
|
||||
ssize_t resid; /* have to get resid to get detailed errno */
|
||||
ASSERT3U(len % 8, ==, 0);
|
||||
|
||||
fletcher_4_incremental_native(buf, len, &ba->zc);
|
||||
ba->err = vn_rdwr(UIO_WRITE, ba->vp,
|
||||
fletcher_4_incremental_native(buf, len, &dsp->dsa_zc);
|
||||
dsp->dsa_err = vn_rdwr(UIO_WRITE, dsp->dsa_vp,
|
||||
(caddr_t)buf, len,
|
||||
0, UIO_SYSSPACE, FAPPEND, RLIM64_INFINITY, CRED(), &resid);
|
||||
*ba->off += len;
|
||||
return (ba->err);
|
||||
|
||||
mutex_enter(&ds->ds_sendstream_lock);
|
||||
*dsp->dsa_off += len;
|
||||
mutex_exit(&ds->ds_sendstream_lock);
|
||||
|
||||
return (dsp->dsa_err);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_free(struct backuparg *ba, uint64_t object, uint64_t offset,
|
||||
dump_free(dmu_sendarg_t *dsp, uint64_t object, uint64_t offset,
|
||||
uint64_t length)
|
||||
{
|
||||
struct drr_free *drrf = &(ba->drr->drr_u.drr_free);
|
||||
struct drr_free *drrf = &(dsp->dsa_drr->drr_u.drr_free);
|
||||
|
||||
if (length != -1ULL && offset + length < offset)
|
||||
length = -1ULL;
|
||||
|
||||
/*
|
||||
* If there is a pending op, but it's not PENDING_FREE, push it out,
|
||||
@ -96,13 +87,15 @@ dump_free(struct backuparg *ba, uint64_t object, uint64_t offset,
|
||||
* other DRR_FREE records. DRR_FREEOBJECTS records can only be
|
||||
* aggregated with other DRR_FREEOBJECTS records.
|
||||
*/
|
||||
if (ba->pending_op != PENDING_NONE && ba->pending_op != PENDING_FREE) {
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dsp->dsa_pending_op != PENDING_NONE &&
|
||||
dsp->dsa_pending_op != PENDING_FREE) {
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
|
||||
if (ba->pending_op == PENDING_FREE) {
|
||||
if (dsp->dsa_pending_op == PENDING_FREE) {
|
||||
/*
|
||||
* There should never be a PENDING_FREE if length is -1
|
||||
* (because dump_dnode is the only place where this
|
||||
@ -120,34 +113,35 @@ dump_free(struct backuparg *ba, uint64_t object, uint64_t offset,
|
||||
return (0);
|
||||
} else {
|
||||
/* not a continuation. Push out pending record */
|
||||
if (dump_bytes(ba, ba->drr,
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
}
|
||||
/* create a FREE record and make it pending */
|
||||
bzero(ba->drr, sizeof (dmu_replay_record_t));
|
||||
ba->drr->drr_type = DRR_FREE;
|
||||
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
|
||||
dsp->dsa_drr->drr_type = DRR_FREE;
|
||||
drrf->drr_object = object;
|
||||
drrf->drr_offset = offset;
|
||||
drrf->drr_length = length;
|
||||
drrf->drr_toguid = ba->toguid;
|
||||
drrf->drr_toguid = dsp->dsa_toguid;
|
||||
if (length == -1ULL) {
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
} else {
|
||||
ba->pending_op = PENDING_FREE;
|
||||
dsp->dsa_pending_op = PENDING_FREE;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_data(struct backuparg *ba, dmu_object_type_t type,
|
||||
dump_data(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
uint64_t object, uint64_t offset, int blksz, const blkptr_t *bp, void *data)
|
||||
{
|
||||
struct drr_write *drrw = &(ba->drr->drr_u.drr_write);
|
||||
struct drr_write *drrw = &(dsp->dsa_drr->drr_u.drr_write);
|
||||
|
||||
|
||||
/*
|
||||
@ -156,19 +150,20 @@ dump_data(struct backuparg *ba, dmu_object_type_t type,
|
||||
* the stream, since aggregation can't be done across operations
|
||||
* of different types.
|
||||
*/
|
||||
if (ba->pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dsp->dsa_pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
/* write a DATA record */
|
||||
bzero(ba->drr, sizeof (dmu_replay_record_t));
|
||||
ba->drr->drr_type = DRR_WRITE;
|
||||
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
|
||||
dsp->dsa_drr->drr_type = DRR_WRITE;
|
||||
drrw->drr_object = object;
|
||||
drrw->drr_type = type;
|
||||
drrw->drr_offset = offset;
|
||||
drrw->drr_length = blksz;
|
||||
drrw->drr_toguid = ba->toguid;
|
||||
drrw->drr_toguid = dsp->dsa_toguid;
|
||||
drrw->drr_checksumtype = BP_GET_CHECKSUM(bp);
|
||||
if (zio_checksum_table[drrw->drr_checksumtype].ci_dedup)
|
||||
drrw->drr_checksumflags |= DRR_CHECKSUM_DEDUP;
|
||||
@ -177,42 +172,43 @@ dump_data(struct backuparg *ba, dmu_object_type_t type,
|
||||
DDK_SET_COMPRESS(&drrw->drr_key, BP_GET_COMPRESS(bp));
|
||||
drrw->drr_key.ddk_cksum = bp->blk_cksum;
|
||||
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
if (dump_bytes(ba, data, blksz) != 0)
|
||||
if (dump_bytes(dsp, data, blksz) != 0)
|
||||
return (EINTR);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_spill(struct backuparg *ba, uint64_t object, int blksz, void *data)
|
||||
dump_spill(dmu_sendarg_t *dsp, uint64_t object, int blksz, void *data)
|
||||
{
|
||||
struct drr_spill *drrs = &(ba->drr->drr_u.drr_spill);
|
||||
struct drr_spill *drrs = &(dsp->dsa_drr->drr_u.drr_spill);
|
||||
|
||||
if (ba->pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dsp->dsa_pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
|
||||
/* write a SPILL record */
|
||||
bzero(ba->drr, sizeof (dmu_replay_record_t));
|
||||
ba->drr->drr_type = DRR_SPILL;
|
||||
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
|
||||
dsp->dsa_drr->drr_type = DRR_SPILL;
|
||||
drrs->drr_object = object;
|
||||
drrs->drr_length = blksz;
|
||||
drrs->drr_toguid = ba->toguid;
|
||||
drrs->drr_toguid = dsp->dsa_toguid;
|
||||
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)))
|
||||
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)))
|
||||
return (EINTR);
|
||||
if (dump_bytes(ba, data, blksz))
|
||||
if (dump_bytes(dsp, data, blksz))
|
||||
return (EINTR);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
|
||||
dump_freeobjects(dmu_sendarg_t *dsp, uint64_t firstobj, uint64_t numobjs)
|
||||
{
|
||||
struct drr_freeobjects *drrfo = &(ba->drr->drr_u.drr_freeobjects);
|
||||
struct drr_freeobjects *drrfo = &(dsp->dsa_drr->drr_u.drr_freeobjects);
|
||||
|
||||
/*
|
||||
* If there is a pending op, but it's not PENDING_FREEOBJECTS,
|
||||
@ -221,13 +217,14 @@ dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
|
||||
* aggregated with other DRR_FREE records. DRR_FREEOBJECTS records
|
||||
* can only be aggregated with other DRR_FREEOBJECTS records.
|
||||
*/
|
||||
if (ba->pending_op != PENDING_NONE &&
|
||||
ba->pending_op != PENDING_FREEOBJECTS) {
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dsp->dsa_pending_op != PENDING_NONE &&
|
||||
dsp->dsa_pending_op != PENDING_FREEOBJECTS) {
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
if (ba->pending_op == PENDING_FREEOBJECTS) {
|
||||
if (dsp->dsa_pending_op == PENDING_FREEOBJECTS) {
|
||||
/*
|
||||
* See whether this free object array can be aggregated
|
||||
* with pending one
|
||||
@ -237,42 +234,43 @@ dump_freeobjects(struct backuparg *ba, uint64_t firstobj, uint64_t numobjs)
|
||||
return (0);
|
||||
} else {
|
||||
/* can't be aggregated. Push out pending record */
|
||||
if (dump_bytes(ba, ba->drr,
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
}
|
||||
|
||||
/* write a FREEOBJECTS record */
|
||||
bzero(ba->drr, sizeof (dmu_replay_record_t));
|
||||
ba->drr->drr_type = DRR_FREEOBJECTS;
|
||||
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
|
||||
dsp->dsa_drr->drr_type = DRR_FREEOBJECTS;
|
||||
drrfo->drr_firstobj = firstobj;
|
||||
drrfo->drr_numobjs = numobjs;
|
||||
drrfo->drr_toguid = ba->toguid;
|
||||
drrfo->drr_toguid = dsp->dsa_toguid;
|
||||
|
||||
ba->pending_op = PENDING_FREEOBJECTS;
|
||||
dsp->dsa_pending_op = PENDING_FREEOBJECTS;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp)
|
||||
dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
|
||||
{
|
||||
struct drr_object *drro = &(ba->drr->drr_u.drr_object);
|
||||
struct drr_object *drro = &(dsp->dsa_drr->drr_u.drr_object);
|
||||
|
||||
if (dnp == NULL || dnp->dn_type == DMU_OT_NONE)
|
||||
return (dump_freeobjects(ba, object, 1));
|
||||
return (dump_freeobjects(dsp, object, 1));
|
||||
|
||||
if (ba->pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dsp->dsa_pending_op != PENDING_NONE) {
|
||||
if (dump_bytes(dsp, dsp->dsa_drr,
|
||||
sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
ba->pending_op = PENDING_NONE;
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
}
|
||||
|
||||
/* write an OBJECT record */
|
||||
bzero(ba->drr, sizeof (dmu_replay_record_t));
|
||||
ba->drr->drr_type = DRR_OBJECT;
|
||||
bzero(dsp->dsa_drr, sizeof (dmu_replay_record_t));
|
||||
dsp->dsa_drr->drr_type = DRR_OBJECT;
|
||||
drro->drr_object = object;
|
||||
drro->drr_type = dnp->dn_type;
|
||||
drro->drr_bonustype = dnp->dn_bonustype;
|
||||
@ -280,19 +278,19 @@ dump_dnode(struct backuparg *ba, uint64_t object, dnode_phys_t *dnp)
|
||||
drro->drr_bonuslen = dnp->dn_bonuslen;
|
||||
drro->drr_checksumtype = dnp->dn_checksum;
|
||||
drro->drr_compress = dnp->dn_compress;
|
||||
drro->drr_toguid = ba->toguid;
|
||||
drro->drr_toguid = dsp->dsa_toguid;
|
||||
|
||||
if (dump_bytes(ba, ba->drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
return (EINTR);
|
||||
|
||||
if (dump_bytes(ba, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
|
||||
if (dump_bytes(dsp, DN_BONUS(dnp), P2ROUNDUP(dnp->dn_bonuslen, 8)) != 0)
|
||||
return (EINTR);
|
||||
|
||||
/* free anything past the end of the file */
|
||||
if (dump_free(ba, object, (dnp->dn_maxblkid + 1) *
|
||||
if (dump_free(dsp, object, (dnp->dn_maxblkid + 1) *
|
||||
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT), -1ULL))
|
||||
return (EINTR);
|
||||
if (ba->err)
|
||||
if (dsp->dsa_err)
|
||||
return (EINTR);
|
||||
return (0);
|
||||
}
|
||||
@ -306,7 +304,7 @@ static int
|
||||
backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
const zbookmark_t *zb, const dnode_phys_t *dnp, void *arg)
|
||||
{
|
||||
struct backuparg *ba = arg;
|
||||
dmu_sendarg_t *dsp = arg;
|
||||
dmu_object_type_t type = bp ? BP_GET_TYPE(bp) : DMU_OT_NONE;
|
||||
int err = 0;
|
||||
|
||||
@ -319,10 +317,10 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
} else if (bp == NULL && zb->zb_object == DMU_META_DNODE_OBJECT) {
|
||||
uint64_t span = BP_SPAN(dnp, zb->zb_level);
|
||||
uint64_t dnobj = (zb->zb_blkid * span) >> DNODE_SHIFT;
|
||||
err = dump_freeobjects(ba, dnobj, span >> DNODE_SHIFT);
|
||||
err = dump_freeobjects(dsp, dnobj, span >> DNODE_SHIFT);
|
||||
} else if (bp == NULL) {
|
||||
uint64_t span = BP_SPAN(dnp, zb->zb_level);
|
||||
err = dump_free(ba, zb->zb_object, zb->zb_blkid * span, span);
|
||||
err = dump_free(dsp, zb->zb_object, zb->zb_blkid * span, span);
|
||||
} else if (zb->zb_level > 0 || type == DMU_OT_OBJSET) {
|
||||
return (0);
|
||||
} else if (type == DMU_OT_DNODE) {
|
||||
@ -341,7 +339,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
for (i = 0; i < blksz >> DNODE_SHIFT; i++) {
|
||||
uint64_t dnobj = (zb->zb_blkid <<
|
||||
(DNODE_BLOCK_SHIFT - DNODE_SHIFT)) + i;
|
||||
err = dump_dnode(ba, dnobj, blk+i);
|
||||
err = dump_dnode(dsp, dnobj, blk+i);
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
@ -356,7 +354,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
|
||||
return (EIO);
|
||||
|
||||
err = dump_spill(ba, zb->zb_object, blksz, abuf->b_data);
|
||||
err = dump_spill(dsp, zb->zb_object, blksz, abuf->b_data);
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
} else { /* it's a level-0 block of a regular object */
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
@ -365,10 +363,22 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
|
||||
if (dsl_read(NULL, spa, bp, pbuf,
|
||||
arc_getbuf_func, &abuf, ZIO_PRIORITY_ASYNC_READ,
|
||||
ZIO_FLAG_CANFAIL, &aflags, zb) != 0)
|
||||
return (EIO);
|
||||
ZIO_FLAG_CANFAIL, &aflags, zb) != 0) {
|
||||
if (zfs_send_corrupt_data) {
|
||||
/* Send a block filled with 0x"zfs badd bloc" */
|
||||
abuf = arc_buf_alloc(spa, blksz, &abuf,
|
||||
ARC_BUFC_DATA);
|
||||
uint64_t *ptr;
|
||||
for (ptr = abuf->b_data;
|
||||
(char *)ptr < (char *)abuf->b_data + blksz;
|
||||
ptr++)
|
||||
*ptr = 0x2f5baddb10c;
|
||||
} else {
|
||||
return (EIO);
|
||||
}
|
||||
}
|
||||
|
||||
err = dump_data(ba, type, zb->zb_object, zb->zb_blkid * blksz,
|
||||
err = dump_data(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
|
||||
blksz, bp, abuf->b_data);
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
}
|
||||
@ -378,13 +388,13 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp, arc_buf_t *pbuf,
|
||||
}
|
||||
|
||||
int
|
||||
dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
vnode_t *vp, offset_t *off)
|
||||
dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
dsl_dataset_t *ds = tosnap->os_dsl_dataset;
|
||||
dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
|
||||
dmu_replay_record_t *drr;
|
||||
struct backuparg ba;
|
||||
dmu_sendarg_t *dsp;
|
||||
int err;
|
||||
uint64_t fromtxg = 0;
|
||||
|
||||
@ -425,8 +435,10 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
#ifdef _KERNEL
|
||||
if (dmu_objset_type(tosnap) == DMU_OST_ZFS) {
|
||||
uint64_t version;
|
||||
if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0)
|
||||
if (zfs_get_zplprop(tosnap, ZFS_PROP_VERSION, &version) != 0) {
|
||||
kmem_free(drr, sizeof (dmu_replay_record_t));
|
||||
return (EINVAL);
|
||||
}
|
||||
if (version == ZPL_VERSION_SA) {
|
||||
DMU_SET_FEATUREFLAGS(
|
||||
drr->drr_u.drr_begin.drr_versioninfo,
|
||||
@ -453,44 +465,137 @@ dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
if (fromorigin)
|
||||
dsl_dataset_rele(fromds, FTAG);
|
||||
|
||||
ba.drr = drr;
|
||||
ba.vp = vp;
|
||||
ba.os = tosnap;
|
||||
ba.off = off;
|
||||
ba.toguid = ds->ds_phys->ds_guid;
|
||||
ZIO_SET_CHECKSUM(&ba.zc, 0, 0, 0, 0);
|
||||
ba.pending_op = PENDING_NONE;
|
||||
dsp = kmem_zalloc(sizeof (dmu_sendarg_t), KM_SLEEP);
|
||||
|
||||
if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) {
|
||||
kmem_free(drr, sizeof (dmu_replay_record_t));
|
||||
return (ba.err);
|
||||
dsp->dsa_drr = drr;
|
||||
dsp->dsa_vp = vp;
|
||||
dsp->dsa_outfd = outfd;
|
||||
dsp->dsa_proc = curproc;
|
||||
dsp->dsa_os = tosnap;
|
||||
dsp->dsa_off = off;
|
||||
dsp->dsa_toguid = ds->ds_phys->ds_guid;
|
||||
ZIO_SET_CHECKSUM(&dsp->dsa_zc, 0, 0, 0, 0);
|
||||
dsp->dsa_pending_op = PENDING_NONE;
|
||||
|
||||
mutex_enter(&ds->ds_sendstream_lock);
|
||||
list_insert_head(&ds->ds_sendstreams, dsp);
|
||||
mutex_exit(&ds->ds_sendstream_lock);
|
||||
|
||||
if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
|
||||
err = dsp->dsa_err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = traverse_dataset(ds, fromtxg, TRAVERSE_PRE | TRAVERSE_PREFETCH,
|
||||
backup_cb, &ba);
|
||||
backup_cb, dsp);
|
||||
|
||||
if (ba.pending_op != PENDING_NONE)
|
||||
if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
if (dsp->dsa_pending_op != PENDING_NONE)
|
||||
if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
err = EINTR;
|
||||
|
||||
if (err) {
|
||||
if (err == EINTR && ba.err)
|
||||
err = ba.err;
|
||||
kmem_free(drr, sizeof (dmu_replay_record_t));
|
||||
return (err);
|
||||
if (err == EINTR && dsp->dsa_err)
|
||||
err = dsp->dsa_err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
bzero(drr, sizeof (dmu_replay_record_t));
|
||||
drr->drr_type = DRR_END;
|
||||
drr->drr_u.drr_end.drr_checksum = ba.zc;
|
||||
drr->drr_u.drr_end.drr_toguid = ba.toguid;
|
||||
drr->drr_u.drr_end.drr_checksum = dsp->dsa_zc;
|
||||
drr->drr_u.drr_end.drr_toguid = dsp->dsa_toguid;
|
||||
|
||||
if (dump_bytes(&ba, drr, sizeof (dmu_replay_record_t)) != 0) {
|
||||
kmem_free(drr, sizeof (dmu_replay_record_t));
|
||||
return (ba.err);
|
||||
if (dump_bytes(dsp, drr, sizeof (dmu_replay_record_t)) != 0) {
|
||||
err = dsp->dsa_err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
mutex_enter(&ds->ds_sendstream_lock);
|
||||
list_remove(&ds->ds_sendstreams, dsp);
|
||||
mutex_exit(&ds->ds_sendstream_lock);
|
||||
|
||||
kmem_free(drr, sizeof (dmu_replay_record_t));
|
||||
kmem_free(dsp, sizeof (dmu_sendarg_t));
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
uint64_t *sizep)
|
||||
{
|
||||
dsl_dataset_t *ds = tosnap->os_dsl_dataset;
|
||||
dsl_dataset_t *fromds = fromsnap ? fromsnap->os_dsl_dataset : NULL;
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
int err;
|
||||
uint64_t size;
|
||||
|
||||
/* tosnap must be a snapshot */
|
||||
if (ds->ds_phys->ds_next_snap_obj == 0)
|
||||
return (EINVAL);
|
||||
|
||||
/* fromsnap must be an earlier snapshot from the same fs as tosnap */
|
||||
if (fromds && (ds->ds_dir != fromds->ds_dir ||
|
||||
fromds->ds_phys->ds_creation_txg >= ds->ds_phys->ds_creation_txg))
|
||||
return (EXDEV);
|
||||
|
||||
if (fromorigin) {
|
||||
if (fromsnap)
|
||||
return (EINVAL);
|
||||
|
||||
if (dsl_dir_is_clone(ds->ds_dir)) {
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
err = dsl_dataset_hold_obj(dp,
|
||||
ds->ds_dir->dd_phys->dd_origin_obj, FTAG, &fromds);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
if (err)
|
||||
return (err);
|
||||
} else {
|
||||
fromorigin = B_FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get uncompressed size estimate of changed data. */
|
||||
if (fromds == NULL) {
|
||||
size = ds->ds_phys->ds_uncompressed_bytes;
|
||||
} else {
|
||||
uint64_t used, comp;
|
||||
err = dsl_dataset_space_written(fromds, ds,
|
||||
&used, &comp, &size);
|
||||
if (fromorigin)
|
||||
dsl_dataset_rele(fromds, FTAG);
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Assume that space (both on-disk and in-stream) is dominated by
|
||||
* data. We will adjust for indirect blocks and the copies property,
|
||||
* but ignore per-object space used (eg, dnodes and DRR_OBJECT records).
|
||||
*/
|
||||
|
||||
/*
|
||||
* Subtract out approximate space used by indirect blocks.
|
||||
* Assume most space is used by data blocks (non-indirect, non-dnode).
|
||||
* Assume all blocks are recordsize. Assume ditto blocks and
|
||||
* internal fragmentation counter out compression.
|
||||
*
|
||||
* Therefore, space used by indirect blocks is sizeof(blkptr_t) per
|
||||
* block, which we observe in practice.
|
||||
*/
|
||||
uint64_t recordsize;
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
err = dsl_prop_get_ds(ds, "recordsize",
|
||||
sizeof (recordsize), 1, &recordsize, NULL);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
if (err)
|
||||
return (err);
|
||||
size -= size / recordsize * sizeof (blkptr_t);
|
||||
|
||||
/* Add in the space for the record associated with each block. */
|
||||
size += size / recordsize * sizeof (dmu_replay_record_t);
|
||||
|
||||
*sizep = size;
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -833,61 +938,6 @@ guid_compare(const void *arg1, const void *arg2)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is a callback used by dmu_objset_find() (which
|
||||
* enumerates the object sets) to build an avl tree that maps guids
|
||||
* to datasets. The resulting table is used when processing DRR_WRITE_BYREF
|
||||
* send stream records. These records, which are used in dedup'ed
|
||||
* streams, do not contain data themselves, but refer to a copy
|
||||
* of the data block that has already been written because it was
|
||||
* earlier in the stream. That previous copy is identified by the
|
||||
* guid of the dataset with the referenced data.
|
||||
*/
|
||||
int
|
||||
find_ds_by_guid(const char *name, void *arg)
|
||||
{
|
||||
avl_tree_t *guid_map = arg;
|
||||
dsl_dataset_t *ds, *snapds;
|
||||
guid_map_entry_t *gmep;
|
||||
dsl_pool_t *dp;
|
||||
int err;
|
||||
uint64_t lastobj, firstobj;
|
||||
|
||||
if (dsl_dataset_hold(name, FTAG, &ds) != 0)
|
||||
return (0);
|
||||
|
||||
dp = ds->ds_dir->dd_pool;
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
firstobj = ds->ds_dir->dd_phys->dd_origin_obj;
|
||||
lastobj = ds->ds_phys->ds_prev_snap_obj;
|
||||
|
||||
while (lastobj != firstobj) {
|
||||
err = dsl_dataset_hold_obj(dp, lastobj, guid_map, &snapds);
|
||||
if (err) {
|
||||
/*
|
||||
* Skip this snapshot and move on. It's not
|
||||
* clear why this would ever happen, but the
|
||||
* remainder of the snapshot streadm can be
|
||||
* processed.
|
||||
*/
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
|
||||
gmep->guid = snapds->ds_phys->ds_guid;
|
||||
gmep->gme_ds = snapds;
|
||||
avl_add(guid_map, gmep);
|
||||
lastobj = snapds->ds_phys->ds_prev_snap_obj;
|
||||
}
|
||||
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
free_guid_map_onexit(void *arg)
|
||||
{
|
||||
@ -1025,8 +1075,8 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
|
||||
void *data = NULL;
|
||||
|
||||
if (drro->drr_type == DMU_OT_NONE ||
|
||||
drro->drr_type >= DMU_OT_NUMTYPES ||
|
||||
drro->drr_bonustype >= DMU_OT_NUMTYPES ||
|
||||
!DMU_OT_IS_VALID(drro->drr_type) ||
|
||||
!DMU_OT_IS_VALID(drro->drr_bonustype) ||
|
||||
drro->drr_checksumtype >= ZIO_CHECKSUM_FUNCTIONS ||
|
||||
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
|
||||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
|
||||
@ -1091,7 +1141,9 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
|
||||
ASSERT3U(db->db_size, >=, drro->drr_bonuslen);
|
||||
bcopy(data, db->db_data, drro->drr_bonuslen);
|
||||
if (ra->byteswap) {
|
||||
dmu_ot[drro->drr_bonustype].ot_byteswap(db->db_data,
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(drro->drr_bonustype);
|
||||
dmu_ot_byteswap[byteswap].ob_func(db->db_data,
|
||||
drro->drr_bonuslen);
|
||||
}
|
||||
dmu_buf_rele(db, FTAG);
|
||||
@ -1134,7 +1186,7 @@ restore_write(struct restorearg *ra, objset_t *os,
|
||||
int err;
|
||||
|
||||
if (drrw->drr_offset + drrw->drr_length < drrw->drr_offset ||
|
||||
drrw->drr_type >= DMU_OT_NUMTYPES)
|
||||
!DMU_OT_IS_VALID(drrw->drr_type))
|
||||
return (EINVAL);
|
||||
|
||||
data = restore_read(ra, drrw->drr_length);
|
||||
@ -1153,8 +1205,11 @@ restore_write(struct restorearg *ra, objset_t *os,
|
||||
dmu_tx_abort(tx);
|
||||
return (err);
|
||||
}
|
||||
if (ra->byteswap)
|
||||
dmu_ot[drrw->drr_type].ot_byteswap(data, drrw->drr_length);
|
||||
if (ra->byteswap) {
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(drrw->drr_type);
|
||||
dmu_ot_byteswap[byteswap].ob_func(data, drrw->drr_length);
|
||||
}
|
||||
dmu_write(os, drrw->drr_object,
|
||||
drrw->drr_offset, drrw->drr_length, data, tx);
|
||||
dmu_tx_commit(tx);
|
||||
@ -1370,9 +1425,6 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
||||
avl_create(ra.guid_to_ds_map, guid_compare,
|
||||
sizeof (guid_map_entry_t),
|
||||
offsetof(guid_map_entry_t, avlnode));
|
||||
(void) dmu_objset_find(drc->drc_top_ds, find_ds_by_guid,
|
||||
(void *)ra.guid_to_ds_map,
|
||||
DS_FIND_CHILDREN);
|
||||
ra.err = zfs_onexit_add_cb(minor,
|
||||
free_guid_map_onexit, ra.guid_to_ds_map,
|
||||
action_handlep);
|
||||
@ -1384,6 +1436,8 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
||||
if (ra.err)
|
||||
goto out;
|
||||
}
|
||||
|
||||
drc->drc_guid_to_ds_map = ra.guid_to_ds_map;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1521,12 +1575,36 @@ recv_end_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
ds->ds_phys->ds_flags &= ~DS_FLAG_INCONSISTENT;
|
||||
}
|
||||
|
||||
static int
|
||||
add_ds_to_guidmap(avl_tree_t *guid_map, dsl_dataset_t *ds)
|
||||
{
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
uint64_t snapobj = ds->ds_phys->ds_prev_snap_obj;
|
||||
dsl_dataset_t *snapds;
|
||||
guid_map_entry_t *gmep;
|
||||
int err;
|
||||
|
||||
ASSERT(guid_map != NULL);
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
err = dsl_dataset_hold_obj(dp, snapobj, guid_map, &snapds);
|
||||
if (err == 0) {
|
||||
gmep = kmem_alloc(sizeof (guid_map_entry_t), KM_SLEEP);
|
||||
gmep->guid = snapds->ds_phys->ds_guid;
|
||||
gmep->gme_ds = snapds;
|
||||
avl_add(guid_map, gmep);
|
||||
}
|
||||
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
dmu_recv_existing_end(dmu_recv_cookie_t *drc)
|
||||
{
|
||||
struct recvendsyncarg resa;
|
||||
dsl_dataset_t *ds = drc->drc_logical_ds;
|
||||
int err;
|
||||
int err, myerr;
|
||||
|
||||
/*
|
||||
* XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
|
||||
@ -1561,8 +1639,11 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
|
||||
|
||||
out:
|
||||
mutex_exit(&ds->ds_recvlock);
|
||||
if (err == 0 && drc->drc_guid_to_ds_map != NULL)
|
||||
(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
|
||||
dsl_dataset_disown(ds, dmu_recv_tag);
|
||||
(void) dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
|
||||
myerr = dsl_dataset_destroy(drc->drc_real_ds, dmu_recv_tag, B_FALSE);
|
||||
ASSERT3U(myerr, ==, 0);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1590,6 +1671,8 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
|
||||
/* clean up the fs we just recv'd into */
|
||||
(void) dsl_dataset_destroy(ds, dmu_recv_tag, B_FALSE);
|
||||
} else {
|
||||
if (drc->drc_guid_to_ds_map != NULL)
|
||||
(void) add_ds_to_guidmap(drc->drc_guid_to_ds_map, ds);
|
||||
/* release the hold from dmu_recv_begin */
|
||||
dsl_dataset_disown(ds, dmu_recv_tag);
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -53,6 +54,7 @@ typedef struct traverse_data {
|
||||
uint64_t td_objset;
|
||||
blkptr_t *td_rootbp;
|
||||
uint64_t td_min_txg;
|
||||
zbookmark_t *td_resume;
|
||||
int td_flags;
|
||||
prefetch_data_t *td_pfd;
|
||||
blkptr_cb_t *td_func;
|
||||
@ -128,6 +130,54 @@ traverse_zil(traverse_data_t *td, zil_header_t *zh)
|
||||
zil_free(zilog);
|
||||
}
|
||||
|
||||
typedef enum resume_skip {
|
||||
RESUME_SKIP_ALL,
|
||||
RESUME_SKIP_NONE,
|
||||
RESUME_SKIP_CHILDREN
|
||||
} resume_skip_t;
|
||||
|
||||
/*
|
||||
* Returns RESUME_SKIP_ALL if td indicates that we are resuming a traversal and
|
||||
* the block indicated by zb does not need to be visited at all. Returns
|
||||
* RESUME_SKIP_CHILDREN if we are resuming a post traversal and we reach the
|
||||
* resume point. This indicates that this block should be visited but not its
|
||||
* children (since they must have been visited in a previous traversal).
|
||||
* Otherwise returns RESUME_SKIP_NONE.
|
||||
*/
|
||||
static resume_skip_t
|
||||
resume_skip_check(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
const zbookmark_t *zb)
|
||||
{
|
||||
if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) {
|
||||
/*
|
||||
* If we already visited this bp & everything below,
|
||||
* don't bother doing it again.
|
||||
*/
|
||||
if (zbookmark_is_before(dnp, zb, td->td_resume))
|
||||
return (RESUME_SKIP_ALL);
|
||||
|
||||
/*
|
||||
* If we found the block we're trying to resume from, zero
|
||||
* the bookmark out to indicate that we have resumed.
|
||||
*/
|
||||
ASSERT3U(zb->zb_object, <=, td->td_resume->zb_object);
|
||||
if (bcmp(zb, td->td_resume, sizeof (*zb)) == 0) {
|
||||
bzero(td->td_resume, sizeof (*zb));
|
||||
if (td->td_flags & TRAVERSE_POST)
|
||||
return (RESUME_SKIP_CHILDREN);
|
||||
}
|
||||
}
|
||||
return (RESUME_SKIP_NONE);
|
||||
}
|
||||
|
||||
static void
|
||||
traverse_pause(traverse_data_t *td, const zbookmark_t *zb)
|
||||
{
|
||||
ASSERT(td->td_resume != NULL);
|
||||
ASSERT3U(zb->zb_level, ==, 0);
|
||||
bcopy(zb, td->td_resume, sizeof (*td->td_resume));
|
||||
}
|
||||
|
||||
static int
|
||||
traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb)
|
||||
@ -137,8 +187,20 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
arc_buf_t *buf = NULL;
|
||||
prefetch_data_t *pd = td->td_pfd;
|
||||
boolean_t hard = td->td_flags & TRAVERSE_HARD;
|
||||
boolean_t pause = B_FALSE;
|
||||
|
||||
if (bp->blk_birth == 0) {
|
||||
switch (resume_skip_check(td, dnp, zb)) {
|
||||
case RESUME_SKIP_ALL:
|
||||
return (0);
|
||||
case RESUME_SKIP_CHILDREN:
|
||||
goto post;
|
||||
case RESUME_SKIP_NONE:
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
err = td->td_func(td->td_spa, NULL, NULL, pbuf, zb, dnp,
|
||||
td->td_arg);
|
||||
return (err);
|
||||
@ -164,8 +226,10 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
td->td_arg);
|
||||
if (err == TRAVERSE_VISIT_NO_CHILDREN)
|
||||
return (0);
|
||||
if (err)
|
||||
return (err);
|
||||
if (err == ERESTART)
|
||||
pause = B_TRUE; /* handle pausing at a common point */
|
||||
if (err != 0)
|
||||
goto post;
|
||||
}
|
||||
|
||||
if (BP_GET_LEVEL(bp) > 0) {
|
||||
@ -253,9 +317,18 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
if (buf)
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
|
||||
post:
|
||||
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
|
||||
err = td->td_func(td->td_spa, NULL, bp, pbuf, zb, dnp,
|
||||
td->td_arg);
|
||||
if (err == ERESTART)
|
||||
pause = B_TRUE;
|
||||
}
|
||||
|
||||
if (pause && td->td_resume != NULL) {
|
||||
ASSERT3U(err, ==, ERESTART);
|
||||
ASSERT(!hard);
|
||||
traverse_pause(td, zb);
|
||||
}
|
||||
|
||||
return (err != 0 ? err : lasterr);
|
||||
@ -353,18 +426,23 @@ traverse_prefetch_thread(void *arg)
|
||||
* in syncing context).
|
||||
*/
|
||||
static int
|
||||
traverse_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *rootbp,
|
||||
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg)
|
||||
traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
||||
uint64_t txg_start, zbookmark_t *resume, int flags,
|
||||
blkptr_cb_t func, void *arg)
|
||||
{
|
||||
traverse_data_t td;
|
||||
prefetch_data_t pd = { 0 };
|
||||
zbookmark_t czb;
|
||||
int err;
|
||||
|
||||
ASSERT(ds == NULL || objset == ds->ds_object);
|
||||
ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST));
|
||||
|
||||
td.td_spa = spa;
|
||||
td.td_objset = ds ? ds->ds_object : 0;
|
||||
td.td_objset = objset;
|
||||
td.td_rootbp = rootbp;
|
||||
td.td_min_txg = txg_start;
|
||||
td.td_resume = resume;
|
||||
td.td_func = func;
|
||||
td.td_arg = arg;
|
||||
td.td_pfd = &pd;
|
||||
@ -416,8 +494,17 @@ int
|
||||
traverse_dataset(dsl_dataset_t *ds, uint64_t txg_start, int flags,
|
||||
blkptr_cb_t func, void *arg)
|
||||
{
|
||||
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds,
|
||||
&ds->ds_phys->ds_bp, txg_start, flags, func, arg));
|
||||
return (traverse_impl(ds->ds_dir->dd_pool->dp_spa, ds, ds->ds_object,
|
||||
&ds->ds_phys->ds_bp, txg_start, NULL, flags, func, arg));
|
||||
}
|
||||
|
||||
int
|
||||
traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
|
||||
uint64_t txg_start, zbookmark_t *resume, int flags,
|
||||
blkptr_cb_t func, void *arg)
|
||||
{
|
||||
return (traverse_impl(spa, NULL, ZB_DESTROYED_OBJSET,
|
||||
blkptr, txg_start, resume, flags, func, arg));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -434,8 +521,8 @@ traverse_pool(spa_t *spa, uint64_t txg_start, int flags,
|
||||
boolean_t hard = (flags & TRAVERSE_HARD);
|
||||
|
||||
/* visit the MOS */
|
||||
err = traverse_impl(spa, NULL, spa_get_rootblkptr(spa),
|
||||
txg_start, flags, func, arg);
|
||||
err = traverse_impl(spa, NULL, 0, spa_get_rootblkptr(spa),
|
||||
txg_start, NULL, flags, func, arg);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -673,9 +675,11 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
return;
|
||||
}
|
||||
|
||||
ASSERT3P(dmu_ot[dn->dn_type].ot_byteswap, ==, zap_byteswap);
|
||||
ASSERT3P(DMU_OT_BYTESWAP(dn->dn_type), ==, DMU_BSWAP_ZAP);
|
||||
|
||||
if (dn->dn_maxblkid == 0 && !add) {
|
||||
blkptr_t *bp;
|
||||
|
||||
/*
|
||||
* If there is only one block (i.e. this is a micro-zap)
|
||||
* and we are not adding anything, the accounting is simple.
|
||||
@ -690,14 +694,13 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
* Use max block size here, since we don't know how much
|
||||
* the size will change between now and the dbuf dirty call.
|
||||
*/
|
||||
bp = &dn->dn_phys->dn_blkptr[0];
|
||||
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
&dn->dn_phys->dn_blkptr[0],
|
||||
dn->dn_phys->dn_blkptr[0].blk_birth)) {
|
||||
bp, bp->blk_birth))
|
||||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
} else {
|
||||
else
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
}
|
||||
if (dn->dn_phys->dn_blkptr[0].blk_birth)
|
||||
if (!BP_IS_HOLE(bp))
|
||||
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
|
||||
return;
|
||||
}
|
||||
@ -1273,7 +1276,6 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
|
||||
{
|
||||
dnode_t *dn;
|
||||
dmu_tx_hold_t *txh;
|
||||
blkptr_t *bp;
|
||||
|
||||
txh = dmu_tx_hold_object_impl(tx, tx->tx_objset, object,
|
||||
THT_SPILL, 0, 0);
|
||||
@ -1284,17 +1286,18 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
|
||||
return;
|
||||
|
||||
/* If blkptr doesn't exist then add space to towrite */
|
||||
bp = &dn->dn_phys->dn_spill;
|
||||
if (BP_IS_HOLE(bp)) {
|
||||
if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tounref = 0;
|
||||
} else {
|
||||
blkptr_t *bp;
|
||||
|
||||
bp = &dn->dn_phys->dn_spill;
|
||||
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
bp, bp->blk_birth))
|
||||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
if (bp->blk_birth)
|
||||
if (!BP_IS_HOLE(bp))
|
||||
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
|
||||
}
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -193,7 +194,7 @@ dnode_verify(dnode_t *dn)
|
||||
ASSERT(dn->dn_objset);
|
||||
ASSERT(dn->dn_handle->dnh_dnode == dn);
|
||||
|
||||
ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
|
||||
|
||||
if (!(zfs_flags & ZFS_DEBUG_DNODE_VERIFY))
|
||||
return;
|
||||
@ -212,7 +213,7 @@ dnode_verify(dnode_t *dn)
|
||||
ASSERT3U(1<<dn->dn_datablkshift, ==, dn->dn_datablksz);
|
||||
}
|
||||
ASSERT3U(dn->dn_nlevels, <=, 30);
|
||||
ASSERT3U(dn->dn_type, <=, DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(dn->dn_type));
|
||||
ASSERT3U(dn->dn_nblkptr, >=, 1);
|
||||
ASSERT3U(dn->dn_nblkptr, <=, DN_MAX_NBLKPTR);
|
||||
ASSERT3U(dn->dn_bonuslen, <=, DN_MAX_BONUSLEN);
|
||||
@ -278,8 +279,10 @@ dnode_byteswap(dnode_phys_t *dnp)
|
||||
*/
|
||||
int off = (dnp->dn_nblkptr-1) * sizeof (blkptr_t);
|
||||
size_t len = DN_MAX_BONUSLEN - off;
|
||||
ASSERT3U(dnp->dn_bonustype, <, DMU_OT_NUMTYPES);
|
||||
dmu_ot[dnp->dn_bonustype].ot_byteswap(dnp->dn_bonus + off, len);
|
||||
ASSERT(DMU_OT_IS_VALID(dnp->dn_bonustype));
|
||||
dmu_object_byteswap_t byteswap =
|
||||
DMU_OT_BYTESWAP(dnp->dn_bonustype);
|
||||
dmu_ot_byteswap[byteswap].ob_func(dnp->dn_bonus + off, len);
|
||||
}
|
||||
|
||||
/* Swap SPILL block if we have one */
|
||||
@ -407,7 +410,7 @@ dnode_create(objset_t *os, dnode_phys_t *dnp, dmu_buf_impl_t *db,
|
||||
|
||||
dmu_zfetch_init(&dn->dn_zfetch, dn);
|
||||
|
||||
ASSERT(dn->dn_phys->dn_type < DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(dn->dn_phys->dn_type));
|
||||
|
||||
mutex_enter(&os->os_lock);
|
||||
list_insert_head(&os->os_dnodes, dn);
|
||||
@ -496,11 +499,11 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
ASSERT(bcmp(dn->dn_phys, &dnode_phys_zero, sizeof (dnode_phys_t)) == 0);
|
||||
ASSERT(dn->dn_phys->dn_type == DMU_OT_NONE);
|
||||
ASSERT(ot != DMU_OT_NONE);
|
||||
ASSERT3U(ot, <, DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(ot));
|
||||
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
|
||||
(bonustype == DMU_OT_SA && bonuslen == 0) ||
|
||||
(bonustype != DMU_OT_NONE && bonuslen != 0));
|
||||
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(bonustype));
|
||||
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
|
||||
ASSERT(dn->dn_type == DMU_OT_NONE);
|
||||
ASSERT3U(dn->dn_maxblkid, ==, 0);
|
||||
@ -568,7 +571,7 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
ASSERT((bonustype == DMU_OT_NONE && bonuslen == 0) ||
|
||||
(bonustype != DMU_OT_NONE && bonuslen != 0) ||
|
||||
(bonustype == DMU_OT_SA && bonuslen == 0));
|
||||
ASSERT3U(bonustype, <, DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(bonustype));
|
||||
ASSERT3U(bonuslen, <=, DN_MAX_BONUSLEN);
|
||||
|
||||
/* clean up any unreferenced dbufs */
|
||||
|
@ -18,8 +18,10 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -594,7 +596,7 @@ dnode_sync(dnode_t *dn, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
if (dn->dn_next_bonustype[txgoff]) {
|
||||
ASSERT(dn->dn_next_bonustype[txgoff] < DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(dn->dn_next_bonustype[txgoff]));
|
||||
dnp->dn_bonustype = dn->dn_next_bonustype[txgoff];
|
||||
dn->dn_next_bonustype[txgoff] = 0;
|
||||
}
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu_objset.h>
|
||||
@ -28,10 +30,12 @@
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_synctask.h>
|
||||
#include <sys/dmu_traverse.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/unique.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
@ -97,7 +101,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
if (BP_IS_HOLE(bp))
|
||||
return;
|
||||
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
|
||||
ASSERT3U(BP_GET_TYPE(bp), <, DMU_OT_NUMTYPES);
|
||||
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
|
||||
if (ds == NULL) {
|
||||
/*
|
||||
* Account for the meta-objset space in its placeholder
|
||||
@ -114,7 +118,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
mutex_enter(&ds->ds_dir->dd_lock);
|
||||
mutex_enter(&ds->ds_lock);
|
||||
delta = parent_delta(ds, used);
|
||||
ds->ds_phys->ds_used_bytes += used;
|
||||
ds->ds_phys->ds_referenced_bytes += used;
|
||||
ds->ds_phys->ds_compressed_bytes += compressed;
|
||||
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
|
||||
ds->ds_phys->ds_unique_bytes += used;
|
||||
@ -208,8 +212,8 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
||||
}
|
||||
}
|
||||
mutex_enter(&ds->ds_lock);
|
||||
ASSERT3U(ds->ds_phys->ds_used_bytes, >=, used);
|
||||
ds->ds_phys->ds_used_bytes -= used;
|
||||
ASSERT3U(ds->ds_phys->ds_referenced_bytes, >=, used);
|
||||
ds->ds_phys->ds_referenced_bytes -= used;
|
||||
ASSERT3U(ds->ds_phys->ds_compressed_bytes, >=, compressed);
|
||||
ds->ds_phys->ds_compressed_bytes -= compressed;
|
||||
ASSERT3U(ds->ds_phys->ds_uncompressed_bytes, >=, uncompressed);
|
||||
@ -393,6 +397,8 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_recvlock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
rw_init(&ds->ds_rwlock, 0, 0, 0);
|
||||
cv_init(&ds->ds_exclusive_cv, NULL, CV_DEFAULT, NULL);
|
||||
|
||||
@ -400,6 +406,9 @@ dsl_dataset_get_ref(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
dsl_deadlist_open(&ds->ds_deadlist,
|
||||
mos, ds->ds_phys->ds_deadlist_obj);
|
||||
|
||||
list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
|
||||
offsetof(dmu_sendarg_t, dsa_link));
|
||||
|
||||
if (err == 0) {
|
||||
err = dsl_dir_open_obj(dp,
|
||||
ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
|
||||
@ -810,8 +819,8 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
dsphys->ds_prev_snap_obj = origin->ds_object;
|
||||
dsphys->ds_prev_snap_txg =
|
||||
origin->ds_phys->ds_creation_txg;
|
||||
dsphys->ds_used_bytes =
|
||||
origin->ds_phys->ds_used_bytes;
|
||||
dsphys->ds_referenced_bytes =
|
||||
origin->ds_phys->ds_referenced_bytes;
|
||||
dsphys->ds_compressed_bytes =
|
||||
origin->ds_phys->ds_compressed_bytes;
|
||||
dsphys->ds_uncompressed_bytes =
|
||||
@ -901,69 +910,55 @@ dsl_dataset_create_sync(dsl_dir_t *pdd, const char *lastname,
|
||||
return (dsobj);
|
||||
}
|
||||
|
||||
struct destroyarg {
|
||||
dsl_sync_task_group_t *dstg;
|
||||
char *snapname;
|
||||
char *failed;
|
||||
boolean_t defer;
|
||||
};
|
||||
|
||||
static int
|
||||
dsl_snapshot_destroy_one(const char *name, void *arg)
|
||||
{
|
||||
struct destroyarg *da = arg;
|
||||
dsl_dataset_t *ds;
|
||||
int err;
|
||||
char *dsname;
|
||||
|
||||
dsname = kmem_asprintf("%s@%s", name, da->snapname);
|
||||
err = dsl_dataset_own(dsname, B_TRUE, da->dstg, &ds);
|
||||
strfree(dsname);
|
||||
if (err == 0) {
|
||||
struct dsl_ds_destroyarg *dsda;
|
||||
|
||||
dsl_dataset_make_exclusive(ds, da->dstg);
|
||||
dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg), KM_SLEEP);
|
||||
dsda->ds = ds;
|
||||
dsda->defer = da->defer;
|
||||
dsl_sync_task_create(da->dstg, dsl_dataset_destroy_check,
|
||||
dsl_dataset_destroy_sync, dsda, da->dstg, 0);
|
||||
} else if (err == ENOENT) {
|
||||
err = 0;
|
||||
} else {
|
||||
(void) strcpy(da->failed, name);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Destroy 'snapname' in all descendants of 'fsname'.
|
||||
* The snapshots must all be in the same pool.
|
||||
*/
|
||||
#pragma weak dmu_snapshots_destroy = dsl_snapshots_destroy
|
||||
int
|
||||
dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
|
||||
dmu_snapshots_destroy_nvl(nvlist_t *snaps, boolean_t defer, char *failed)
|
||||
{
|
||||
int err;
|
||||
struct destroyarg da;
|
||||
dsl_sync_task_t *dst;
|
||||
spa_t *spa;
|
||||
nvpair_t *pair;
|
||||
dsl_sync_task_group_t *dstg;
|
||||
|
||||
err = spa_open(fsname, &spa, FTAG);
|
||||
pair = nvlist_next_nvpair(snaps, NULL);
|
||||
if (pair == NULL)
|
||||
return (0);
|
||||
|
||||
err = spa_open(nvpair_name(pair), &spa, FTAG);
|
||||
if (err)
|
||||
return (err);
|
||||
da.dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
||||
da.snapname = snapname;
|
||||
da.failed = fsname;
|
||||
da.defer = defer;
|
||||
dstg = dsl_sync_task_group_create(spa_get_dsl(spa));
|
||||
|
||||
err = dmu_objset_find(fsname,
|
||||
dsl_snapshot_destroy_one, &da, DS_FIND_CHILDREN);
|
||||
for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(snaps, pair)) {
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
err = dsl_dataset_own(nvpair_name(pair), B_TRUE, dstg, &ds);
|
||||
if (err == 0) {
|
||||
struct dsl_ds_destroyarg *dsda;
|
||||
|
||||
dsl_dataset_make_exclusive(ds, dstg);
|
||||
dsda = kmem_zalloc(sizeof (struct dsl_ds_destroyarg),
|
||||
KM_SLEEP);
|
||||
dsda->ds = ds;
|
||||
dsda->defer = defer;
|
||||
dsl_sync_task_create(dstg, dsl_dataset_destroy_check,
|
||||
dsl_dataset_destroy_sync, dsda, dstg, 0);
|
||||
} else if (err == ENOENT) {
|
||||
err = 0;
|
||||
} else {
|
||||
(void) strcpy(failed, nvpair_name(pair));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 0)
|
||||
err = dsl_sync_task_group_wait(da.dstg);
|
||||
err = dsl_sync_task_group_wait(dstg);
|
||||
|
||||
for (dst = list_head(&da.dstg->dstg_tasks); dst;
|
||||
dst = list_next(&da.dstg->dstg_tasks, dst)) {
|
||||
for (dst = list_head(&dstg->dstg_tasks); dst;
|
||||
dst = list_next(&dstg->dstg_tasks, dst)) {
|
||||
struct dsl_ds_destroyarg *dsda = dst->dst_arg1;
|
||||
dsl_dataset_t *ds = dsda->ds;
|
||||
|
||||
@ -971,17 +966,17 @@ dsl_snapshots_destroy(char *fsname, char *snapname, boolean_t defer)
|
||||
* Return the file system name that triggered the error
|
||||
*/
|
||||
if (dst->dst_err) {
|
||||
dsl_dataset_name(ds, fsname);
|
||||
*strchr(fsname, '@') = '\0';
|
||||
dsl_dataset_name(ds, failed);
|
||||
}
|
||||
ASSERT3P(dsda->rm_origin, ==, NULL);
|
||||
dsl_dataset_disown(ds, da.dstg);
|
||||
dsl_dataset_disown(ds, dstg);
|
||||
kmem_free(dsda, sizeof (struct dsl_ds_destroyarg));
|
||||
}
|
||||
|
||||
dsl_sync_task_group_destroy(da.dstg);
|
||||
dsl_sync_task_group_destroy(dstg);
|
||||
spa_close(spa, FTAG);
|
||||
return (err);
|
||||
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
@ -1087,19 +1082,23 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* remove the objects in open context, so that we won't
|
||||
* have too much to do in syncing context.
|
||||
* If async destruction is not enabled try to remove all objects
|
||||
* while in the open context so that there is less work to do in
|
||||
* the syncing context.
|
||||
*/
|
||||
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
|
||||
ds->ds_phys->ds_prev_snap_txg)) {
|
||||
/*
|
||||
* Ignore errors, if there is not enough disk space
|
||||
* we will deal with it in dsl_dataset_destroy_sync().
|
||||
*/
|
||||
(void) dmu_free_object(os, obj);
|
||||
if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
|
||||
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
|
||||
ds->ds_phys->ds_prev_snap_txg)) {
|
||||
/*
|
||||
* Ignore errors, if there is not enough disk space
|
||||
* we will deal with it in dsl_dataset_destroy_sync().
|
||||
*/
|
||||
(void) dmu_free_object(os, obj);
|
||||
}
|
||||
if (err != ESRCH)
|
||||
goto out;
|
||||
}
|
||||
if (err != ESRCH)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Only the ZIL knows how to free log blocks.
|
||||
@ -1245,7 +1244,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
|
||||
ASSERT(!dsl_dataset_is_snapshot(ds));
|
||||
|
||||
if (ds->ds_phys->ds_prev_snap_obj != 0)
|
||||
mrs_used = ds->ds_prev->ds_phys->ds_used_bytes;
|
||||
mrs_used = ds->ds_prev->ds_phys->ds_referenced_bytes;
|
||||
else
|
||||
mrs_used = 0;
|
||||
|
||||
@ -1253,7 +1252,7 @@ dsl_dataset_recalc_head_uniq(dsl_dataset_t *ds)
|
||||
|
||||
ASSERT3U(dlused, <=, mrs_used);
|
||||
ds->ds_phys->ds_unique_bytes =
|
||||
ds->ds_phys->ds_used_bytes - (mrs_used - dlused);
|
||||
ds->ds_phys->ds_referenced_bytes - (mrs_used - dlused);
|
||||
|
||||
if (spa_version(ds->ds_dir->dd_pool->dp_spa) >=
|
||||
SPA_VERSION_UNIQUE_ACCURATE)
|
||||
@ -1611,6 +1610,30 @@ process_old_deadlist(dsl_dataset_t *ds, dsl_dataset_t *ds_prev,
|
||||
ds_next->ds_phys->ds_deadlist_obj);
|
||||
}
|
||||
|
||||
static int
|
||||
old_synchronous_dataset_destroy(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
{
|
||||
int err;
|
||||
struct killarg ka;
|
||||
|
||||
/*
|
||||
* Free everything that we point to (that's born after
|
||||
* the previous snapshot, if we are a clone)
|
||||
*
|
||||
* NB: this should be very quick, because we already
|
||||
* freed all the objects in open context.
|
||||
*/
|
||||
ka.ds = ds;
|
||||
ka.tx = tx;
|
||||
err = traverse_dataset(ds,
|
||||
ds->ds_phys->ds_prev_snap_txg, TRAVERSE_POST,
|
||||
kill_blkptr, &ka);
|
||||
ASSERT3U(err, ==, 0);
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) || ds->ds_phys->ds_unique_bytes == 0);
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
@ -1757,7 +1780,6 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
tx);
|
||||
dsl_dir_diduse_space(tx->tx_pool->dp_free_dir,
|
||||
DD_USED_HEAD, used, comp, uncomp, tx);
|
||||
dsl_dir_dirty(tx->tx_pool->dp_free_dir, tx);
|
||||
|
||||
/* Merge our deadlist into next's and free it. */
|
||||
dsl_deadlist_merge(&ds_next->ds_deadlist,
|
||||
@ -1833,32 +1855,54 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
}
|
||||
dsl_dataset_rele(ds_next, FTAG);
|
||||
} else {
|
||||
zfeature_info_t *async_destroy =
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
|
||||
|
||||
/*
|
||||
* There's no next snapshot, so this is a head dataset.
|
||||
* Destroy the deadlist. Unless it's a clone, the
|
||||
* deadlist should be empty. (If it's a clone, it's
|
||||
* safe to ignore the deadlist contents.)
|
||||
*/
|
||||
struct killarg ka;
|
||||
|
||||
dsl_deadlist_close(&ds->ds_deadlist);
|
||||
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
|
||||
ds->ds_phys->ds_deadlist_obj = 0;
|
||||
|
||||
/*
|
||||
* Free everything that we point to (that's born after
|
||||
* the previous snapshot, if we are a clone)
|
||||
*
|
||||
* NB: this should be very quick, because we already
|
||||
* freed all the objects in open context.
|
||||
*/
|
||||
ka.ds = ds;
|
||||
ka.tx = tx;
|
||||
err = traverse_dataset(ds, ds->ds_phys->ds_prev_snap_txg,
|
||||
TRAVERSE_POST, kill_blkptr, &ka);
|
||||
ASSERT3U(err, ==, 0);
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
|
||||
ds->ds_phys->ds_unique_bytes == 0);
|
||||
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
|
||||
err = old_synchronous_dataset_destroy(ds, tx);
|
||||
} else {
|
||||
/*
|
||||
* Move the bptree into the pool's list of trees to
|
||||
* clean up and update space accounting information.
|
||||
*/
|
||||
uint64_t used, comp, uncomp;
|
||||
|
||||
ASSERT(err == 0 || err == EBUSY);
|
||||
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
|
||||
spa_feature_incr(dp->dp_spa, async_destroy, tx);
|
||||
dp->dp_bptree_obj = bptree_alloc(
|
||||
dp->dp_meta_objset, tx);
|
||||
VERIFY(zap_add(dp->dp_meta_objset,
|
||||
DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
|
||||
&dp->dp_bptree_obj, tx) == 0);
|
||||
}
|
||||
|
||||
used = ds->ds_dir->dd_phys->dd_used_bytes;
|
||||
comp = ds->ds_dir->dd_phys->dd_compressed_bytes;
|
||||
uncomp = ds->ds_dir->dd_phys->dd_uncompressed_bytes;
|
||||
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
|
||||
ds->ds_phys->ds_unique_bytes == used);
|
||||
|
||||
bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
|
||||
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
|
||||
used, comp, uncomp, tx);
|
||||
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
|
||||
-used, -comp, -uncomp, tx);
|
||||
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
|
||||
used, comp, uncomp, tx);
|
||||
}
|
||||
|
||||
if (ds->ds_prev != NULL) {
|
||||
if (spa_version(dp->dp_spa) >= SPA_VERSION_DIR_CLONES) {
|
||||
@ -2049,7 +2093,7 @@ dsl_dataset_snapshot_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
dsphys->ds_creation_time = gethrestime_sec();
|
||||
dsphys->ds_creation_txg = crtxg;
|
||||
dsphys->ds_deadlist_obj = ds->ds_phys->ds_deadlist_obj;
|
||||
dsphys->ds_used_bytes = ds->ds_phys->ds_used_bytes;
|
||||
dsphys->ds_referenced_bytes = ds->ds_phys->ds_referenced_bytes;
|
||||
dsphys->ds_compressed_bytes = ds->ds_phys->ds_compressed_bytes;
|
||||
dsphys->ds_uncompressed_bytes = ds->ds_phys->ds_uncompressed_bytes;
|
||||
dsphys->ds_flags = ds->ds_phys->ds_flags;
|
||||
@ -2142,10 +2186,71 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
|
||||
dmu_objset_sync(ds->ds_objset, zio, tx);
|
||||
}
|
||||
|
||||
static void
|
||||
get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
|
||||
{
|
||||
uint64_t count = 0;
|
||||
objset_t *mos = ds->ds_dir->dd_pool->dp_meta_objset;
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
nvlist_t *propval;
|
||||
nvlist_t *val;
|
||||
|
||||
rw_enter(&ds->ds_dir->dd_pool->dp_config_rwlock, RW_READER);
|
||||
VERIFY(nvlist_alloc(&propval, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
VERIFY(nvlist_alloc(&val, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
|
||||
/*
|
||||
* There may me missing entries in ds_next_clones_obj
|
||||
* due to a bug in a previous version of the code.
|
||||
* Only trust it if it has the right number of entries.
|
||||
*/
|
||||
if (ds->ds_phys->ds_next_clones_obj != 0) {
|
||||
ASSERT3U(0, ==, zap_count(mos, ds->ds_phys->ds_next_clones_obj,
|
||||
&count));
|
||||
}
|
||||
if (count != ds->ds_phys->ds_num_children - 1) {
|
||||
goto fail;
|
||||
}
|
||||
for (zap_cursor_init(&zc, mos, ds->ds_phys->ds_next_clones_obj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
dsl_dataset_t *clone;
|
||||
char buf[ZFS_MAXNAMELEN];
|
||||
/*
|
||||
* Even though we hold the dp_config_rwlock, the dataset
|
||||
* may fail to open, returning ENOENT. If there is a
|
||||
* thread concurrently attempting to destroy this
|
||||
* dataset, it will have the ds_rwlock held for
|
||||
* RW_WRITER. Our call to dsl_dataset_hold_obj() ->
|
||||
* dsl_dataset_hold_ref() will fail its
|
||||
* rw_tryenter(&ds->ds_rwlock, RW_READER), drop the
|
||||
* dp_config_rwlock, and wait for the destroy progress
|
||||
* and signal ds_exclusive_cv. If the destroy was
|
||||
* successful, we will see that
|
||||
* DSL_DATASET_IS_DESTROYED(), and return ENOENT.
|
||||
*/
|
||||
if (dsl_dataset_hold_obj(ds->ds_dir->dd_pool,
|
||||
za.za_first_integer, FTAG, &clone) != 0)
|
||||
continue;
|
||||
dsl_dir_name(clone->ds_dir, buf);
|
||||
VERIFY(nvlist_add_boolean(val, buf) == 0);
|
||||
dsl_dataset_rele(clone, FTAG);
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
VERIFY(nvlist_add_nvlist(propval, ZPROP_VALUE, val) == 0);
|
||||
VERIFY(nvlist_add_nvlist(nv, zfs_prop_to_name(ZFS_PROP_CLONES),
|
||||
propval) == 0);
|
||||
fail:
|
||||
nvlist_free(val);
|
||||
nvlist_free(propval);
|
||||
rw_exit(&ds->ds_dir->dd_pool->dp_config_rwlock);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
||||
{
|
||||
uint64_t refd, avail, uobjs, aobjs;
|
||||
uint64_t refd, avail, uobjs, aobjs, ratio;
|
||||
|
||||
dsl_dir_stats(ds->ds_dir, nv);
|
||||
|
||||
@ -2172,6 +2277,31 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_DEFER_DESTROY,
|
||||
DS_IS_DEFER_DESTROY(ds) ? 1 : 0);
|
||||
|
||||
if (ds->ds_phys->ds_prev_snap_obj != 0) {
|
||||
uint64_t written, comp, uncomp;
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
dsl_dataset_t *prev;
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
int err = dsl_dataset_hold_obj(dp,
|
||||
ds->ds_phys->ds_prev_snap_obj, FTAG, &prev);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
if (err == 0) {
|
||||
err = dsl_dataset_space_written(prev, ds, &written,
|
||||
&comp, &uncomp);
|
||||
dsl_dataset_rele(prev, FTAG);
|
||||
if (err == 0) {
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_WRITTEN,
|
||||
written);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ratio = ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
|
||||
(ds->ds_phys->ds_uncompressed_bytes * 100 /
|
||||
ds->ds_phys->ds_compressed_bytes);
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_REFRATIO, ratio);
|
||||
|
||||
if (ds->ds_phys->ds_next_snap_obj) {
|
||||
/*
|
||||
* This is a snapshot; override the dd's space used with
|
||||
@ -2179,10 +2309,9 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv)
|
||||
*/
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_USED,
|
||||
ds->ds_phys->ds_unique_bytes);
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO,
|
||||
ds->ds_phys->ds_compressed_bytes == 0 ? 100 :
|
||||
(ds->ds_phys->ds_uncompressed_bytes * 100 /
|
||||
ds->ds_phys->ds_compressed_bytes));
|
||||
dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_COMPRESSRATIO, ratio);
|
||||
|
||||
get_clones_stat(ds, nv);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2226,7 +2355,7 @@ dsl_dataset_space(dsl_dataset_t *ds,
|
||||
uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp)
|
||||
{
|
||||
*refdbytesp = ds->ds_phys->ds_used_bytes;
|
||||
*refdbytesp = ds->ds_phys->ds_referenced_bytes;
|
||||
*availbytesp = dsl_dir_space_available(ds->ds_dir, NULL, 0, TRUE);
|
||||
if (ds->ds_reserved > ds->ds_phys->ds_unique_bytes)
|
||||
*availbytesp += ds->ds_reserved - ds->ds_phys->ds_unique_bytes;
|
||||
@ -2563,7 +2692,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
* Note however, if we stop before we reach the ORIGIN we get:
|
||||
* uN + kN + kN-1 + ... + kM - uM-1
|
||||
*/
|
||||
pa->used = origin_ds->ds_phys->ds_used_bytes;
|
||||
pa->used = origin_ds->ds_phys->ds_referenced_bytes;
|
||||
pa->comp = origin_ds->ds_phys->ds_compressed_bytes;
|
||||
pa->uncomp = origin_ds->ds_phys->ds_uncompressed_bytes;
|
||||
for (snap = list_head(&pa->shared_snaps); snap;
|
||||
@ -2597,7 +2726,7 @@ dsl_dataset_promote_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
* so we need to subtract out the clone origin's used space.
|
||||
*/
|
||||
if (pa->origin_origin) {
|
||||
pa->used -= pa->origin_origin->ds_phys->ds_used_bytes;
|
||||
pa->used -= pa->origin_origin->ds_phys->ds_referenced_bytes;
|
||||
pa->comp -= pa->origin_origin->ds_phys->ds_compressed_bytes;
|
||||
pa->uncomp -= pa->origin_origin->ds_phys->ds_uncompressed_bytes;
|
||||
}
|
||||
@ -3113,8 +3242,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
dsl_deadlist_space(&csa->ohds->ds_deadlist,
|
||||
&odl_used, &odl_comp, &odl_uncomp);
|
||||
|
||||
dused = csa->cds->ds_phys->ds_used_bytes + cdl_used -
|
||||
(csa->ohds->ds_phys->ds_used_bytes + odl_used);
|
||||
dused = csa->cds->ds_phys->ds_referenced_bytes + cdl_used -
|
||||
(csa->ohds->ds_phys->ds_referenced_bytes + odl_used);
|
||||
dcomp = csa->cds->ds_phys->ds_compressed_bytes + cdl_comp -
|
||||
(csa->ohds->ds_phys->ds_compressed_bytes + odl_comp);
|
||||
duncomp = csa->cds->ds_phys->ds_uncompressed_bytes +
|
||||
@ -3143,8 +3272,8 @@ dsl_dataset_clone_swap_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
/* swap ds_*_bytes */
|
||||
SWITCH64(csa->ohds->ds_phys->ds_used_bytes,
|
||||
csa->cds->ds_phys->ds_used_bytes);
|
||||
SWITCH64(csa->ohds->ds_phys->ds_referenced_bytes,
|
||||
csa->cds->ds_phys->ds_referenced_bytes);
|
||||
SWITCH64(csa->ohds->ds_phys->ds_compressed_bytes,
|
||||
csa->cds->ds_phys->ds_compressed_bytes);
|
||||
SWITCH64(csa->ohds->ds_phys->ds_uncompressed_bytes,
|
||||
@ -3273,8 +3402,9 @@ dsl_dataset_check_quota(dsl_dataset_t *ds, boolean_t check_quota,
|
||||
* on-disk is over quota and there are no pending changes (which
|
||||
* may free up space for us).
|
||||
*/
|
||||
if (ds->ds_phys->ds_used_bytes + inflight >= ds->ds_quota) {
|
||||
if (inflight > 0 || ds->ds_phys->ds_used_bytes < ds->ds_quota)
|
||||
if (ds->ds_phys->ds_referenced_bytes + inflight >= ds->ds_quota) {
|
||||
if (inflight > 0 ||
|
||||
ds->ds_phys->ds_referenced_bytes < ds->ds_quota)
|
||||
error = ERESTART;
|
||||
else
|
||||
error = EDQUOT;
|
||||
@ -3301,7 +3431,7 @@ dsl_dataset_set_quota_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
if (psa->psa_effective_value == 0)
|
||||
return (0);
|
||||
|
||||
if (psa->psa_effective_value < ds->ds_phys->ds_used_bytes ||
|
||||
if (psa->psa_effective_value < ds->ds_phys->ds_referenced_bytes ||
|
||||
psa->psa_effective_value < ds->ds_reserved)
|
||||
return (ENOSPC);
|
||||
|
||||
@ -4009,7 +4139,7 @@ dsl_dataset_get_holds(const char *dsname, nvlist_t **nvp)
|
||||
}
|
||||
|
||||
/*
|
||||
* Note, this fuction is used as the callback for dmu_objset_find(). We
|
||||
* Note, this function is used as the callback for dmu_objset_find(). We
|
||||
* always return 0 so that we will continue to find and process
|
||||
* inconsistent datasets, even if we encounter an error trying to
|
||||
* process one of them.
|
||||
@ -4028,3 +4158,156 @@ dsl_destroy_inconsistent(const char *dsname, void *arg)
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return (in *usedp) the amount of space written in new that is not
|
||||
* present in oldsnap. New may be a snapshot or the head. Old must be
|
||||
* a snapshot before new, in new's filesystem (or its origin). If not then
|
||||
* fail and return EINVAL.
|
||||
*
|
||||
* The written space is calculated by considering two components: First, we
|
||||
* ignore any freed space, and calculate the written as new's used space
|
||||
* minus old's used space. Next, we add in the amount of space that was freed
|
||||
* between the two snapshots, thus reducing new's used space relative to old's.
|
||||
* Specifically, this is the space that was born before old->ds_creation_txg,
|
||||
* and freed before new (ie. on new's deadlist or a previous deadlist).
|
||||
*
|
||||
* space freed [---------------------]
|
||||
* snapshots ---O-------O--------O-------O------
|
||||
* oldsnap new
|
||||
*/
|
||||
int
|
||||
dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
int err = 0;
|
||||
uint64_t snapobj;
|
||||
dsl_pool_t *dp = new->ds_dir->dd_pool;
|
||||
|
||||
*usedp = 0;
|
||||
*usedp += new->ds_phys->ds_referenced_bytes;
|
||||
*usedp -= oldsnap->ds_phys->ds_referenced_bytes;
|
||||
|
||||
*compp = 0;
|
||||
*compp += new->ds_phys->ds_compressed_bytes;
|
||||
*compp -= oldsnap->ds_phys->ds_compressed_bytes;
|
||||
|
||||
*uncompp = 0;
|
||||
*uncompp += new->ds_phys->ds_uncompressed_bytes;
|
||||
*uncompp -= oldsnap->ds_phys->ds_uncompressed_bytes;
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
snapobj = new->ds_object;
|
||||
while (snapobj != oldsnap->ds_object) {
|
||||
dsl_dataset_t *snap;
|
||||
uint64_t used, comp, uncomp;
|
||||
|
||||
if (snapobj == new->ds_object) {
|
||||
snap = new;
|
||||
} else {
|
||||
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &snap);
|
||||
if (err != 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (snap->ds_phys->ds_prev_snap_txg ==
|
||||
oldsnap->ds_phys->ds_creation_txg) {
|
||||
/*
|
||||
* The blocks in the deadlist can not be born after
|
||||
* ds_prev_snap_txg, so get the whole deadlist space,
|
||||
* which is more efficient (especially for old-format
|
||||
* deadlists). Unfortunately the deadlist code
|
||||
* doesn't have enough information to make this
|
||||
* optimization itself.
|
||||
*/
|
||||
dsl_deadlist_space(&snap->ds_deadlist,
|
||||
&used, &comp, &uncomp);
|
||||
} else {
|
||||
dsl_deadlist_space_range(&snap->ds_deadlist,
|
||||
0, oldsnap->ds_phys->ds_creation_txg,
|
||||
&used, &comp, &uncomp);
|
||||
}
|
||||
*usedp += used;
|
||||
*compp += comp;
|
||||
*uncompp += uncomp;
|
||||
|
||||
/*
|
||||
* If we get to the beginning of the chain of snapshots
|
||||
* (ds_prev_snap_obj == 0) before oldsnap, then oldsnap
|
||||
* was not a snapshot of/before new.
|
||||
*/
|
||||
snapobj = snap->ds_phys->ds_prev_snap_obj;
|
||||
if (snap != new)
|
||||
dsl_dataset_rele(snap, FTAG);
|
||||
if (snapobj == 0) {
|
||||
err = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return (in *usedp) the amount of space that will be reclaimed if firstsnap,
|
||||
* lastsnap, and all snapshots in between are deleted.
|
||||
*
|
||||
* blocks that would be freed [---------------------------]
|
||||
* snapshots ---O-------O--------O-------O--------O
|
||||
* firstsnap lastsnap
|
||||
*
|
||||
* This is the set of blocks that were born after the snap before firstsnap,
|
||||
* (birth > firstsnap->prev_snap_txg) and died before the snap after the
|
||||
* last snap (ie, is on lastsnap->ds_next->ds_deadlist or an earlier deadlist).
|
||||
* We calculate this by iterating over the relevant deadlists (from the snap
|
||||
* after lastsnap, backward to the snap after firstsnap), summing up the
|
||||
* space on the deadlist that was born after the snap before firstsnap.
|
||||
*/
|
||||
int
|
||||
dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
|
||||
dsl_dataset_t *lastsnap,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
int err = 0;
|
||||
uint64_t snapobj;
|
||||
dsl_pool_t *dp = firstsnap->ds_dir->dd_pool;
|
||||
|
||||
ASSERT(dsl_dataset_is_snapshot(firstsnap));
|
||||
ASSERT(dsl_dataset_is_snapshot(lastsnap));
|
||||
|
||||
/*
|
||||
* Check that the snapshots are in the same dsl_dir, and firstsnap
|
||||
* is before lastsnap.
|
||||
*/
|
||||
if (firstsnap->ds_dir != lastsnap->ds_dir ||
|
||||
firstsnap->ds_phys->ds_creation_txg >
|
||||
lastsnap->ds_phys->ds_creation_txg)
|
||||
return (EINVAL);
|
||||
|
||||
*usedp = *compp = *uncompp = 0;
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
snapobj = lastsnap->ds_phys->ds_next_snap_obj;
|
||||
while (snapobj != firstsnap->ds_object) {
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t used, comp, uncomp;
|
||||
|
||||
err = dsl_dataset_hold_obj(dp, snapobj, FTAG, &ds);
|
||||
if (err != 0)
|
||||
break;
|
||||
|
||||
dsl_deadlist_space_range(&ds->ds_deadlist,
|
||||
firstsnap->ds_phys->ds_prev_snap_txg, UINT64_MAX,
|
||||
&used, &comp, &uncomp);
|
||||
*usedp += used;
|
||||
*compp += comp;
|
||||
*uncompp += uncomp;
|
||||
|
||||
snapobj = ds->ds_phys->ds_prev_snap_obj;
|
||||
ASSERT3U(snapobj, !=, 0);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
return (err);
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dsl_dataset.h>
|
||||
@ -29,6 +30,26 @@
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
|
||||
/*
|
||||
* Deadlist concurrency:
|
||||
*
|
||||
* Deadlists can only be modified from the syncing thread.
|
||||
*
|
||||
* Except for dsl_deadlist_insert(), it can only be modified with the
|
||||
* dp_config_rwlock held with RW_WRITER.
|
||||
*
|
||||
* The accessors (dsl_deadlist_space() and dsl_deadlist_space_range()) can
|
||||
* be called concurrently, from open context, with the dl_config_rwlock held
|
||||
* with RW_READER.
|
||||
*
|
||||
* Therefore, we only need to provide locking between dsl_deadlist_insert() and
|
||||
* the accessors, protecting:
|
||||
* dl_phys->dl_used,comp,uncomp
|
||||
* and protecting the dl_tree from being loaded.
|
||||
* The locking is provided by dl_lock. Note that locking on the bpobj_t
|
||||
* provides its own locking, and dl_oldfmt is immutable.
|
||||
*/
|
||||
|
||||
static int
|
||||
dsl_deadlist_compare(const void *arg1, const void *arg2)
|
||||
{
|
||||
@ -309,14 +330,14 @@ dsl_deadlist_space(dsl_deadlist_t *dl,
|
||||
* return space used in the range (mintxg, maxtxg].
|
||||
* Includes maxtxg, does not include mintxg.
|
||||
* mintxg and maxtxg must both be keys in the deadlist (unless maxtxg is
|
||||
* UINT64_MAX).
|
||||
* larger than any bp in the deadlist (eg. UINT64_MAX)).
|
||||
*/
|
||||
void
|
||||
dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp)
|
||||
{
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
dsl_deadlist_entry_t *dle;
|
||||
dsl_deadlist_entry_t dle_tofind;
|
||||
avl_index_t where;
|
||||
|
||||
if (dl->dl_oldfmt) {
|
||||
@ -325,9 +346,10 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
|
||||
return;
|
||||
}
|
||||
|
||||
dsl_deadlist_load_tree(dl);
|
||||
*usedp = *compp = *uncompp = 0;
|
||||
|
||||
mutex_enter(&dl->dl_lock);
|
||||
dsl_deadlist_load_tree(dl);
|
||||
dle_tofind.dle_mintxg = mintxg;
|
||||
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
|
||||
/*
|
||||
@ -336,6 +358,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
|
||||
*/
|
||||
ASSERT(dle != NULL ||
|
||||
avl_nearest(&dl->dl_tree, where, AVL_AFTER) == NULL);
|
||||
|
||||
for (; dle && dle->dle_mintxg < maxtxg;
|
||||
dle = AVL_NEXT(&dl->dl_tree, dle)) {
|
||||
uint64_t used, comp, uncomp;
|
||||
@ -347,6 +370,7 @@ dsl_deadlist_space_range(dsl_deadlist_t *dl, uint64_t mintxg, uint64_t maxtxg,
|
||||
*compp += comp;
|
||||
*uncompp += uncomp;
|
||||
}
|
||||
mutex_exit(&dl->dl_lock);
|
||||
}
|
||||
|
||||
static void
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -170,10 +171,8 @@ dsl_deleg_set_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
VERIFY(nvpair_value_nvlist(whopair, &perms) == 0);
|
||||
|
||||
if (zap_lookup(mos, zapobj, whokey, 8, 1, &jumpobj) != 0) {
|
||||
jumpobj = zap_create(mos, DMU_OT_DSL_PERMS,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
VERIFY(zap_update(mos, zapobj,
|
||||
whokey, 8, 1, &jumpobj, tx) == 0);
|
||||
jumpobj = zap_create_link(mos, DMU_OT_DSL_PERMS,
|
||||
zapobj, whokey, tx);
|
||||
}
|
||||
|
||||
while (permpair = nvlist_next_nvpair(perms, permpair)) {
|
||||
@ -525,10 +524,12 @@ dsl_load_user_sets(objset_t *mos, uint64_t zapobj, avl_tree_t *avl,
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if user has requested permission.
|
||||
* Check if user has requested permission. If descendent is set, must have
|
||||
* descendent perms.
|
||||
*/
|
||||
int
|
||||
dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
|
||||
dsl_deleg_access_impl(dsl_dataset_t *ds, boolean_t descendent, const char *perm,
|
||||
cred_t *cr)
|
||||
{
|
||||
dsl_dir_t *dd;
|
||||
dsl_pool_t *dp;
|
||||
@ -549,7 +550,7 @@ dsl_deleg_access_impl(dsl_dataset_t *ds, const char *perm, cred_t *cr)
|
||||
SPA_VERSION_DELEGATED_PERMS)
|
||||
return (EPERM);
|
||||
|
||||
if (dsl_dataset_is_snapshot(ds)) {
|
||||
if (dsl_dataset_is_snapshot(ds) || descendent) {
|
||||
/*
|
||||
* Snapshots are treated as descendents only,
|
||||
* local permissions do not apply.
|
||||
@ -642,7 +643,7 @@ dsl_deleg_access(const char *dsname, const char *perm, cred_t *cr)
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
error = dsl_deleg_access_impl(ds, perm, cr);
|
||||
error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
|
||||
return (error);
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dsl_pool.h>
|
||||
@ -39,6 +40,8 @@
|
||||
#include <sys/zfs_znode.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/dsl_deadlist.h>
|
||||
#include <sys/bptree.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
int zfs_no_write_throttle = 0;
|
||||
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
|
||||
@ -99,20 +102,32 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
||||
}
|
||||
|
||||
int
|
||||
dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
{
|
||||
int err;
|
||||
dsl_pool_t *dp = dsl_pool_open_impl(spa, txg);
|
||||
|
||||
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
|
||||
&dp->dp_meta_objset);
|
||||
if (err != 0)
|
||||
dsl_pool_close(dp);
|
||||
else
|
||||
*dpp = dp;
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_pool_open(dsl_pool_t *dp)
|
||||
{
|
||||
int err;
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
uint64_t obj;
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
|
||||
err = dmu_objset_open_impl(spa, NULL, &dp->dp_meta_rootbp,
|
||||
&dp->dp_meta_objset);
|
||||
if (err)
|
||||
goto out;
|
||||
ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset));
|
||||
|
||||
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
|
||||
&dp->dp_root_dir_obj);
|
||||
@ -128,7 +143,7 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (spa_version(spa) >= SPA_VERSION_ORIGIN) {
|
||||
if (spa_version(dp->dp_spa) >= SPA_VERSION_ORIGIN) {
|
||||
err = dsl_pool_open_special_dir(dp, ORIGIN_DIR_NAME, &dd);
|
||||
if (err)
|
||||
goto out;
|
||||
@ -145,7 +160,7 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (spa_version(spa) >= SPA_VERSION_DEADLISTS) {
|
||||
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
|
||||
err = dsl_pool_open_special_dir(dp, FREE_DIR_NAME,
|
||||
&dp->dp_free_dir);
|
||||
if (err)
|
||||
@ -159,6 +174,15 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
dp->dp_meta_objset, obj));
|
||||
}
|
||||
|
||||
if (spa_feature_is_active(dp->dp_spa,
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
|
||||
&dp->dp_bptree_obj);
|
||||
if (err != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
|
||||
&dp->dp_tmp_userrefs_obj);
|
||||
@ -167,15 +191,10 @@ dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp)
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = dsl_scan_init(dp, txg);
|
||||
err = dsl_scan_init(dp, dp->dp_tx.tx_open_txg);
|
||||
|
||||
out:
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
if (err)
|
||||
dsl_pool_close(dp);
|
||||
else
|
||||
*dpp = dp;
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -291,7 +310,10 @@ static int
|
||||
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_deadlist_t *dl = arg;
|
||||
dsl_pool_t *dp = dmu_objset_pool(dl->dl_os);
|
||||
rw_enter(&dp->dp_config_rwlock, RW_READER);
|
||||
dsl_deadlist_insert(dl, bp, tx);
|
||||
rw_exit(&dp->dp_config_rwlock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -466,7 +488,7 @@ int
|
||||
dsl_pool_sync_context(dsl_pool_t *dp)
|
||||
{
|
||||
return (curthread == dp->dp_tx.tx_sync_thread ||
|
||||
spa_get_dsl(dp->dp_spa) == NULL);
|
||||
spa_is_initializing(dp->dp_spa));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
@ -784,11 +806,8 @@ dsl_pool_user_hold_create_obj(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
ASSERT(dp->dp_tmp_userrefs_obj == 0);
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
dp->dp_tmp_userrefs_obj = zap_create(mos, DMU_OT_USERREFS,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
|
||||
VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS,
|
||||
sizeof (uint64_t), 1, &dp->dp_tmp_userrefs_obj, tx) == 0);
|
||||
dp->dp_tmp_userrefs_obj = zap_create_link(mos, DMU_OT_USERREFS,
|
||||
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_TMP_USERREFS, tx);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dsl_scan.h>
|
||||
@ -44,6 +45,7 @@
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/sa.h>
|
||||
#include <sys/sa_impl.h>
|
||||
#include <sys/zfeature.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/zfs_vfsops.h>
|
||||
#endif
|
||||
@ -382,55 +384,6 @@ dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
|
||||
priority, zio_flags, arc_flags, zb));
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
bookmark_is_zero(const zbookmark_t *zb)
|
||||
{
|
||||
return (zb->zb_objset == 0 && zb->zb_object == 0 &&
|
||||
zb->zb_level == 0 && zb->zb_blkid == 0);
|
||||
}
|
||||
|
||||
/* dnp is the dnode for zb1->zb_object */
|
||||
static boolean_t
|
||||
bookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
|
||||
const zbookmark_t *zb2)
|
||||
{
|
||||
uint64_t zb1nextL0, zb2thisobj;
|
||||
|
||||
ASSERT(zb1->zb_objset == zb2->zb_objset);
|
||||
ASSERT(zb2->zb_level == 0);
|
||||
|
||||
/*
|
||||
* A bookmark in the deadlist is considered to be after
|
||||
* everything else.
|
||||
*/
|
||||
if (zb2->zb_object == DMU_DEADLIST_OBJECT)
|
||||
return (B_TRUE);
|
||||
|
||||
/* The objset_phys_t isn't before anything. */
|
||||
if (dnp == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
zb1nextL0 = (zb1->zb_blkid + 1) <<
|
||||
((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
|
||||
|
||||
zb2thisobj = zb2->zb_object ? zb2->zb_object :
|
||||
zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
|
||||
|
||||
if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
|
||||
uint64_t nextobj = zb1nextL0 *
|
||||
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
|
||||
return (nextobj <= zb2thisobj);
|
||||
}
|
||||
|
||||
if (zb1->zb_object < zb2thisobj)
|
||||
return (B_TRUE);
|
||||
if (zb1->zb_object > zb2thisobj)
|
||||
return (B_FALSE);
|
||||
if (zb2->zb_object == DMU_META_DNODE_OBJECT)
|
||||
return (B_FALSE);
|
||||
return (zb1nextL0 <= zb2->zb_blkid);
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
dsl_scan_ds_maxtxg(dsl_dataset_t *ds)
|
||||
{
|
||||
@ -462,7 +415,7 @@ dsl_scan_check_pause(dsl_scan_t *scn, const zbookmark_t *zb)
|
||||
if (scn->scn_pausing)
|
||||
return (B_TRUE); /* we're already pausing */
|
||||
|
||||
if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark))
|
||||
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark))
|
||||
return (B_FALSE); /* we're resuming */
|
||||
|
||||
/* We only know how to resume from level-0 blocks. */
|
||||
@ -617,13 +570,13 @@ dsl_scan_check_resume(dsl_scan_t *scn, const dnode_phys_t *dnp,
|
||||
/*
|
||||
* We never skip over user/group accounting objects (obj<0)
|
||||
*/
|
||||
if (!bookmark_is_zero(&scn->scn_phys.scn_bookmark) &&
|
||||
if (!ZB_IS_ZERO(&scn->scn_phys.scn_bookmark) &&
|
||||
(int64_t)zb->zb_object >= 0) {
|
||||
/*
|
||||
* If we already visited this bp & everything below (in
|
||||
* a prior txg sync), don't bother doing it again.
|
||||
*/
|
||||
if (bookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
|
||||
if (zbookmark_is_before(dnp, zb, &scn->scn_phys.scn_bookmark))
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
@ -816,22 +769,6 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_t *zb,
|
||||
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg)
|
||||
return;
|
||||
|
||||
if (BP_GET_TYPE(bp) != DMU_OT_USERGROUP_USED) {
|
||||
/*
|
||||
* For non-user-accounting blocks, we need to read the
|
||||
* new bp (from a deleted snapshot, found in
|
||||
* check_existing_xlation). If we used the old bp,
|
||||
* pointers inside this block from before we resumed
|
||||
* would be untranslated.
|
||||
*
|
||||
* For user-accounting blocks, we need to read the old
|
||||
* bp, because we will apply the entire space delta to
|
||||
* it (original untranslated -> translations from
|
||||
* deleted snap -> now).
|
||||
*/
|
||||
bp_toread = *bp;
|
||||
}
|
||||
|
||||
if (dsl_scan_recurse(scn, ds, ostype, dnp, &bp_toread, zb, tx,
|
||||
&buf) != 0)
|
||||
return;
|
||||
@ -1396,19 +1333,28 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_scan_free_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
static boolean_t
|
||||
dsl_scan_free_should_pause(dsl_scan_t *scn)
|
||||
{
|
||||
dsl_scan_t *scn = arg;
|
||||
uint64_t elapsed_nanosecs;
|
||||
|
||||
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
|
||||
|
||||
if (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
|
||||
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
|
||||
(elapsed_nanosecs / MICROSEC > zfs_free_min_time_ms &&
|
||||
txg_sync_waiting(scn->scn_dp)) ||
|
||||
spa_shutting_down(scn->scn_dp->dp_spa))
|
||||
return (ERESTART);
|
||||
spa_shutting_down(scn->scn_dp->dp_spa));
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_scan_free_block_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
dsl_scan_t *scn = arg;
|
||||
|
||||
if (!scn->scn_is_bptree ||
|
||||
(BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_OBJSET)) {
|
||||
if (dsl_scan_free_should_pause(scn))
|
||||
return (ERESTART);
|
||||
}
|
||||
|
||||
zio_nowait(zio_free_sync(scn->scn_zio_root, scn->scn_dp->dp_spa,
|
||||
dmu_tx_get_txg(tx), bp, 0));
|
||||
@ -1433,6 +1379,10 @@ dsl_scan_active(dsl_scan_t *scn)
|
||||
if (scn->scn_phys.scn_state == DSS_SCANNING)
|
||||
return (B_TRUE);
|
||||
|
||||
if (spa_feature_is_active(spa,
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
|
||||
return (B_TRUE);
|
||||
}
|
||||
if (spa_version(scn->scn_dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
|
||||
(void) bpobj_space(&scn->scn_dp->dp_free_bpobj,
|
||||
&used, &comp, &uncomp);
|
||||
@ -1479,14 +1429,40 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
* traversing it.
|
||||
*/
|
||||
if (spa_version(dp->dp_spa) >= SPA_VERSION_DEADLISTS) {
|
||||
scn->scn_is_bptree = B_FALSE;
|
||||
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
|
||||
NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
err = bpobj_iterate(&dp->dp_free_bpobj,
|
||||
dsl_scan_free_cb, scn, tx);
|
||||
dsl_scan_free_block_cb, scn, tx);
|
||||
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
|
||||
|
||||
if (err == 0 && spa_feature_is_active(spa,
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
|
||||
scn->scn_is_bptree = B_TRUE;
|
||||
scn->scn_zio_root = zio_root(dp->dp_spa, NULL,
|
||||
NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
err = bptree_iterate(dp->dp_meta_objset,
|
||||
dp->dp_bptree_obj, B_TRUE, dsl_scan_free_block_cb,
|
||||
scn, tx);
|
||||
VERIFY3U(0, ==, zio_wait(scn->scn_zio_root));
|
||||
if (err != 0)
|
||||
return;
|
||||
|
||||
/* disable async destroy feature */
|
||||
spa_feature_decr(spa,
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY], tx);
|
||||
ASSERT(!spa_feature_is_active(spa,
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY]));
|
||||
VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
|
||||
DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_BPTREE_OBJ, tx));
|
||||
VERIFY3U(0, ==, bptree_free(dp->dp_meta_objset,
|
||||
dp->dp_bptree_obj, tx));
|
||||
dp->dp_bptree_obj = 0;
|
||||
}
|
||||
if (scn->scn_visited_this_txg) {
|
||||
zfs_dbgmsg("freed %llu blocks in %llums from "
|
||||
"free_bpobj txg %llu",
|
||||
"free_bpobj/bptree txg %llu",
|
||||
(longlong_t)scn->scn_visited_this_txg,
|
||||
(longlong_t)
|
||||
(gethrtime() - scn->scn_sync_start_time) / MICROSEC,
|
||||
@ -1601,6 +1577,8 @@ count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
|
||||
for (i = 0; i < 4; i++) {
|
||||
int l = (i < 2) ? BP_GET_LEVEL(bp) : DN_MAX_LEVELS;
|
||||
int t = (i & 1) ? BP_GET_TYPE(bp) : DMU_OT_TOTAL;
|
||||
if (t & DMU_OT_NEWTYPE)
|
||||
t = DMU_OT_OTHER;
|
||||
zfs_blkstat_t *zb = &zab->zab_type[l][t];
|
||||
int equal;
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -30,9 +31,28 @@
|
||||
#include <sys/vdev_impl.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
/*
|
||||
* Allow allocations to switch to gang blocks quickly. We do this to
|
||||
* avoid having to load lots of space_maps in a given txg. There are,
|
||||
* however, some cases where we want to avoid "fast" ganging and instead
|
||||
* we want to do an exhaustive search of all metaslabs on this device.
|
||||
* Currently we don't allow any gang, zil, or dump device related allocations
|
||||
* to "fast" gang.
|
||||
*/
|
||||
#define CAN_FASTGANG(flags) \
|
||||
(!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
|
||||
METASLAB_GANG_AVOID)))
|
||||
|
||||
uint64_t metaslab_aliquot = 512ULL << 10;
|
||||
uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
|
||||
|
||||
/*
|
||||
* This value defines the number of allowed allocation failures per vdev.
|
||||
* If a device reaches this threshold in a given txg then we consider skipping
|
||||
* allocations on that device.
|
||||
*/
|
||||
int zfs_mg_alloc_failures;
|
||||
|
||||
/*
|
||||
* Metaslab debugging: when set, keeps all space maps in core to verify frees.
|
||||
*/
|
||||
@ -671,7 +691,7 @@ static space_map_ops_t metaslab_ndf_ops = {
|
||||
metaslab_ndf_fragmented
|
||||
};
|
||||
|
||||
space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops;
|
||||
space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
@ -844,7 +864,7 @@ metaslab_prefetch(metaslab_group_t *mg)
|
||||
}
|
||||
|
||||
static int
|
||||
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
|
||||
metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
|
||||
{
|
||||
metaslab_group_t *mg = msp->ms_group;
|
||||
space_map_t *sm = &msp->ms_map;
|
||||
@ -877,13 +897,6 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
|
||||
mutex_exit(&mg->mg_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we were able to load the map then make sure
|
||||
* that this map is still able to satisfy our request.
|
||||
*/
|
||||
if (msp->ms_weight < size)
|
||||
return (ENOSPC);
|
||||
|
||||
metaslab_group_sort(msp->ms_group, msp,
|
||||
msp->ms_weight | activation_weight);
|
||||
}
|
||||
@ -1099,6 +1112,7 @@ void
|
||||
metaslab_sync_reassess(metaslab_group_t *mg)
|
||||
{
|
||||
vdev_t *vd = mg->mg_vd;
|
||||
int64_t failures = mg->mg_alloc_failures;
|
||||
|
||||
/*
|
||||
* Re-evaluate all metaslabs which have lower offsets than the
|
||||
@ -1115,6 +1129,8 @@ metaslab_sync_reassess(metaslab_group_t *mg)
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
||||
atomic_add_64(&mg->mg_alloc_failures, -failures);
|
||||
|
||||
/*
|
||||
* Prefetch the next potential metaslabs
|
||||
*/
|
||||
@ -1139,9 +1155,10 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
||||
uint64_t min_distance, dva_t *dva, int d)
|
||||
metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
||||
uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags)
|
||||
{
|
||||
spa_t *spa = mg->mg_vd->vdev_spa;
|
||||
metaslab_t *msp = NULL;
|
||||
uint64_t offset = -1ULL;
|
||||
avl_tree_t *t = &mg->mg_metaslab_tree;
|
||||
@ -1162,11 +1179,17 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
||||
|
||||
mutex_enter(&mg->mg_lock);
|
||||
for (msp = avl_first(t); msp; msp = AVL_NEXT(t, msp)) {
|
||||
if (msp->ms_weight < size) {
|
||||
if (msp->ms_weight < asize) {
|
||||
spa_dbgmsg(spa, "%s: failed to meet weight "
|
||||
"requirement: vdev %llu, txg %llu, mg %p, "
|
||||
"msp %p, psize %llu, asize %llu, "
|
||||
"failures %llu, weight %llu",
|
||||
spa_name(spa), mg->mg_vd->vdev_id, txg,
|
||||
mg, msp, psize, asize,
|
||||
mg->mg_alloc_failures, msp->ms_weight);
|
||||
mutex_exit(&mg->mg_lock);
|
||||
return (-1ULL);
|
||||
}
|
||||
|
||||
was_active = msp->ms_weight & METASLAB_ACTIVE_MASK;
|
||||
if (activation_weight == METASLAB_WEIGHT_PRIMARY)
|
||||
break;
|
||||
@ -1185,6 +1208,25 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
||||
if (msp == NULL)
|
||||
return (-1ULL);
|
||||
|
||||
/*
|
||||
* If we've already reached the allowable number of failed
|
||||
* allocation attempts on this metaslab group then we
|
||||
* consider skipping it. We skip it only if we're allowed
|
||||
* to "fast" gang, the physical size is larger than
|
||||
* a gang block, and we're attempting to allocate from
|
||||
* the primary metaslab.
|
||||
*/
|
||||
if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
|
||||
CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
|
||||
activation_weight == METASLAB_WEIGHT_PRIMARY) {
|
||||
spa_dbgmsg(spa, "%s: skipping metaslab group: "
|
||||
"vdev %llu, txg %llu, mg %p, psize %llu, "
|
||||
"asize %llu, failures %llu", spa_name(spa),
|
||||
mg->mg_vd->vdev_id, txg, mg, psize, asize,
|
||||
mg->mg_alloc_failures);
|
||||
return (-1ULL);
|
||||
}
|
||||
|
||||
mutex_enter(&msp->ms_lock);
|
||||
|
||||
/*
|
||||
@ -1193,7 +1235,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
||||
* another thread may have changed the weight while we
|
||||
* were blocked on the metaslab lock.
|
||||
*/
|
||||
if (msp->ms_weight < size || (was_active &&
|
||||
if (msp->ms_weight < asize || (was_active &&
|
||||
!(msp->ms_weight & METASLAB_ACTIVE_MASK) &&
|
||||
activation_weight == METASLAB_WEIGHT_PRIMARY)) {
|
||||
mutex_exit(&msp->ms_lock);
|
||||
@ -1208,14 +1250,16 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
||||
continue;
|
||||
}
|
||||
|
||||
if (metaslab_activate(msp, activation_weight, size) != 0) {
|
||||
if (metaslab_activate(msp, activation_weight) != 0) {
|
||||
mutex_exit(&msp->ms_lock);
|
||||
continue;
|
||||
}
|
||||
|
||||
if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
|
||||
if ((offset = space_map_alloc(&msp->ms_map, asize)) != -1ULL)
|
||||
break;
|
||||
|
||||
atomic_inc_64(&mg->mg_alloc_failures);
|
||||
|
||||
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
|
||||
|
||||
mutex_exit(&msp->ms_lock);
|
||||
@ -1224,7 +1268,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
|
||||
if (msp->ms_allocmap[txg & TXG_MASK].sm_space == 0)
|
||||
vdev_dirty(mg->mg_vd, VDD_METASLAB, msp, txg);
|
||||
|
||||
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, size);
|
||||
space_map_add(&msp->ms_allocmap[txg & TXG_MASK], offset, asize);
|
||||
|
||||
mutex_exit(&msp->ms_lock);
|
||||
|
||||
@ -1351,7 +1395,8 @@ top:
|
||||
asize = vdev_psize_to_asize(vd, psize);
|
||||
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
|
||||
|
||||
offset = metaslab_group_alloc(mg, asize, txg, distance, dva, d);
|
||||
offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
|
||||
dva, d, flags);
|
||||
if (offset != -1ULL) {
|
||||
/*
|
||||
* If we've just selected this metaslab group,
|
||||
@ -1363,18 +1408,24 @@ top:
|
||||
vdev_stat_t *vs = &vd->vdev_stat;
|
||||
int64_t vu, cu;
|
||||
|
||||
/*
|
||||
* Determine percent used in units of 0..1024.
|
||||
* (This is just to avoid floating point.)
|
||||
*/
|
||||
vu = (vs->vs_alloc << 10) / (vs->vs_space + 1);
|
||||
cu = (mc->mc_alloc << 10) / (mc->mc_space + 1);
|
||||
vu = (vs->vs_alloc * 100) / (vs->vs_space + 1);
|
||||
cu = (mc->mc_alloc * 100) / (mc->mc_space + 1);
|
||||
|
||||
/*
|
||||
* Bias by at most +/- 25% of the aliquot.
|
||||
* Calculate how much more or less we should
|
||||
* try to allocate from this device during
|
||||
* this iteration around the rotor.
|
||||
* For example, if a device is 80% full
|
||||
* and the pool is 20% full then we should
|
||||
* reduce allocations by 60% on this device.
|
||||
*
|
||||
* mg_bias = (20 - 80) * 512K / 100 = -307K
|
||||
*
|
||||
* This reduces allocations by 307K for this
|
||||
* iteration.
|
||||
*/
|
||||
mg->mg_bias = ((cu - vu) *
|
||||
(int64_t)mg->mg_aliquot) / (1024 * 4);
|
||||
(int64_t)mg->mg_aliquot) / 100;
|
||||
}
|
||||
|
||||
if (atomic_add_64_nv(&mc->mc_aliquot, asize) >=
|
||||
@ -1488,7 +1539,7 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
|
||||
mutex_enter(&msp->ms_lock);
|
||||
|
||||
if ((txg != 0 && spa_writeable(spa)) || !msp->ms_map.sm_loaded)
|
||||
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY, 0);
|
||||
error = metaslab_activate(msp, METASLAB_WEIGHT_SECONDARY);
|
||||
|
||||
if (error == 0 && !space_map_contains(&msp->ms_map, offset, size))
|
||||
error = ENOENT;
|
||||
|
@ -18,8 +18,11 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Portions Copyright 2011 iXsystems, Inc
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -426,10 +429,9 @@ sa_add_layout_entry(objset_t *os, sa_attr_type_t *attrs, int attr_count,
|
||||
char attr_name[8];
|
||||
|
||||
if (sa->sa_layout_attr_obj == 0) {
|
||||
sa->sa_layout_attr_obj = zap_create(os,
|
||||
DMU_OT_SA_ATTR_LAYOUTS, DMU_OT_NONE, 0, tx);
|
||||
VERIFY(zap_add(os, sa->sa_master_obj, SA_LAYOUTS, 8, 1,
|
||||
&sa->sa_layout_attr_obj, tx) == 0);
|
||||
sa->sa_layout_attr_obj = zap_create_link(os,
|
||||
DMU_OT_SA_ATTR_LAYOUTS,
|
||||
sa->sa_master_obj, SA_LAYOUTS, tx);
|
||||
}
|
||||
|
||||
(void) snprintf(attr_name, sizeof (attr_name),
|
||||
@ -605,14 +607,14 @@ sa_find_sizes(sa_os_t *sa, sa_bulk_attr_t *attr_desc, int attr_count,
|
||||
* and spill buffer.
|
||||
*/
|
||||
if (buftype == SA_BONUS && *index == -1 &&
|
||||
P2ROUNDUP(*total + hdrsize, 8) >
|
||||
*total + P2ROUNDUP(hdrsize, 8) >
|
||||
(full_space - sizeof (blkptr_t))) {
|
||||
*index = i;
|
||||
done = B_TRUE;
|
||||
}
|
||||
|
||||
next:
|
||||
if (P2ROUNDUP(*total + hdrsize, 8) > full_space &&
|
||||
if (*total + P2ROUNDUP(hdrsize, 8) > full_space &&
|
||||
buftype == SA_BONUS)
|
||||
*will_spill = B_TRUE;
|
||||
}
|
||||
@ -1551,10 +1553,9 @@ sa_attr_register_sync(sa_handle_t *hdl, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
if (sa->sa_reg_attr_obj == NULL) {
|
||||
sa->sa_reg_attr_obj = zap_create(hdl->sa_os,
|
||||
DMU_OT_SA_ATTR_REGISTRATION, DMU_OT_NONE, 0, tx);
|
||||
VERIFY(zap_add(hdl->sa_os, sa->sa_master_obj,
|
||||
SA_REGISTRY, 8, 1, &sa->sa_reg_attr_obj, tx) == 0);
|
||||
sa->sa_reg_attr_obj = zap_create_link(hdl->sa_os,
|
||||
DMU_OT_SA_ATTR_REGISTRATION,
|
||||
sa->sa_master_obj, SA_REGISTRY, tx);
|
||||
}
|
||||
for (i = 0; i != sa->sa_num_attrs; i++) {
|
||||
if (sa->sa_attr_table[i].sa_registered)
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -60,6 +62,7 @@
|
||||
#include <sys/spa_boot.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
#include <sys/dsl_scan.h>
|
||||
#include <sys/zfeature.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/bootprops.h>
|
||||
@ -111,6 +114,7 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
|
||||
};
|
||||
|
||||
static dsl_syncfunc_t spa_sync_version;
|
||||
static dsl_syncfunc_t spa_sync_props;
|
||||
static boolean_t spa_has_active_shared_spare(spa_t *spa);
|
||||
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
|
||||
@ -165,15 +169,18 @@ spa_prop_add_list(nvlist_t *nvl, zpool_prop_t prop, char *strval,
|
||||
static void
|
||||
spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
|
||||
{
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
dsl_pool_t *pool = spa->spa_dsl_pool;
|
||||
uint64_t size;
|
||||
uint64_t alloc;
|
||||
uint64_t space;
|
||||
uint64_t cap, version;
|
||||
zprop_source_t src = ZPROP_SRC_NONE;
|
||||
spa_config_dirent_t *dp;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa->spa_props_lock));
|
||||
|
||||
if (spa->spa_root_vdev != NULL) {
|
||||
if (rvd != NULL) {
|
||||
alloc = metaslab_class_get_alloc(spa_normal_class(spa));
|
||||
size = metaslab_class_get_space(spa_normal_class(spa));
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_NAME, spa_name(spa), 0, src);
|
||||
@ -181,6 +188,15 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_ALLOCATED, NULL, alloc, src);
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_FREE, NULL,
|
||||
size - alloc, src);
|
||||
|
||||
space = 0;
|
||||
for (int c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *tvd = rvd->vdev_child[c];
|
||||
space += tvd->vdev_max_asize - tvd->vdev_asize;
|
||||
}
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_EXPANDSZ, NULL, space,
|
||||
src);
|
||||
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_READONLY, NULL,
|
||||
(spa_mode(spa) == FREAD), src);
|
||||
|
||||
@ -191,7 +207,7 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
|
||||
ddt_get_pool_dedup_ratio(spa), src);
|
||||
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_HEALTH, NULL,
|
||||
spa->spa_root_vdev->vdev_state, src);
|
||||
rvd->vdev_state, src);
|
||||
|
||||
version = spa_version(spa);
|
||||
if (version == zpool_prop_default_numeric(ZPOOL_PROP_VERSION))
|
||||
@ -201,8 +217,29 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_VERSION, NULL, version, src);
|
||||
}
|
||||
|
||||
if (pool != NULL) {
|
||||
dsl_dir_t *freedir = pool->dp_free_dir;
|
||||
|
||||
/*
|
||||
* The $FREE directory was introduced in SPA_VERSION_DEADLISTS,
|
||||
* when opening pools before this version freedir will be NULL.
|
||||
*/
|
||||
if (freedir != NULL) {
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING, NULL,
|
||||
freedir->dd_phys->dd_used_bytes, src);
|
||||
} else {
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_FREEING,
|
||||
NULL, 0, src);
|
||||
}
|
||||
}
|
||||
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_GUID, NULL, spa_guid(spa), src);
|
||||
|
||||
if (spa->spa_comment != NULL) {
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_COMMENT, spa->spa_comment,
|
||||
0, ZPROP_SRC_LOCAL);
|
||||
}
|
||||
|
||||
if (spa->spa_root != NULL)
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
|
||||
0, ZPROP_SRC_LOCAL);
|
||||
@ -335,25 +372,55 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
||||
nvpair_t *elem;
|
||||
int error = 0, reset_bootfs = 0;
|
||||
uint64_t objnum;
|
||||
boolean_t has_feature = B_FALSE;
|
||||
|
||||
elem = NULL;
|
||||
while ((elem = nvlist_next_nvpair(props, elem)) != NULL) {
|
||||
zpool_prop_t prop;
|
||||
char *propname, *strval;
|
||||
uint64_t intval;
|
||||
objset_t *os;
|
||||
char *slash;
|
||||
|
||||
propname = nvpair_name(elem);
|
||||
|
||||
if ((prop = zpool_name_to_prop(propname)) == ZPROP_INVAL)
|
||||
return (EINVAL);
|
||||
char *strval, *slash, *check, *fname;
|
||||
const char *propname = nvpair_name(elem);
|
||||
zpool_prop_t prop = zpool_name_to_prop(propname);
|
||||
|
||||
switch (prop) {
|
||||
case ZPROP_INVAL:
|
||||
if (!zpool_prop_feature(propname)) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Sanitize the input.
|
||||
*/
|
||||
if (nvpair_type(elem) != DATA_TYPE_UINT64) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (intval != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
fname = strchr(propname, '@') + 1;
|
||||
if (zfeature_lookup_name(fname, NULL) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
has_feature = B_TRUE;
|
||||
break;
|
||||
|
||||
case ZPOOL_PROP_VERSION:
|
||||
error = nvpair_value_uint64(elem, &intval);
|
||||
if (!error &&
|
||||
(intval < spa_version(spa) || intval > SPA_VERSION))
|
||||
(intval < spa_version(spa) ||
|
||||
intval > SPA_VERSION_BEFORE_FEATURES ||
|
||||
has_feature))
|
||||
error = EINVAL;
|
||||
break;
|
||||
|
||||
@ -390,6 +457,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
||||
error = nvpair_value_string(elem, &strval);
|
||||
|
||||
if (!error) {
|
||||
objset_t *os;
|
||||
uint64_t compress;
|
||||
|
||||
if (strval == NULL || strval[0] == '\0') {
|
||||
@ -462,6 +530,26 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
||||
error = EINVAL;
|
||||
break;
|
||||
|
||||
case ZPOOL_PROP_COMMENT:
|
||||
if ((error = nvpair_value_string(elem, &strval)) != 0)
|
||||
break;
|
||||
for (check = strval; *check != '\0'; check++) {
|
||||
/*
|
||||
* The kernel doesn't have an easy isprint()
|
||||
* check. For this kernel check, we merely
|
||||
* check ASCII apart from DEL. Fix this if
|
||||
* there is an easy-to-use kernel isprint().
|
||||
*/
|
||||
if (*check >= 0x7f) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
check++;
|
||||
}
|
||||
if (strlen(strval) > ZPROP_MAX_COMMENT)
|
||||
error = E2BIG;
|
||||
break;
|
||||
|
||||
case ZPOOL_PROP_DEDUPDITTO:
|
||||
if (spa_version(spa) < SPA_VERSION_DEDUP)
|
||||
error = ENOTSUP;
|
||||
@ -519,33 +607,58 @@ int
|
||||
spa_prop_set(spa_t *spa, nvlist_t *nvp)
|
||||
{
|
||||
int error;
|
||||
nvpair_t *elem;
|
||||
nvpair_t *elem = NULL;
|
||||
boolean_t need_sync = B_FALSE;
|
||||
zpool_prop_t prop;
|
||||
|
||||
if ((error = spa_prop_validate(spa, nvp)) != 0)
|
||||
return (error);
|
||||
|
||||
elem = NULL;
|
||||
while ((elem = nvlist_next_nvpair(nvp, elem)) != NULL) {
|
||||
if ((prop = zpool_name_to_prop(
|
||||
nvpair_name(elem))) == ZPROP_INVAL)
|
||||
return (EINVAL);
|
||||
zpool_prop_t prop = zpool_name_to_prop(nvpair_name(elem));
|
||||
|
||||
if (prop == ZPOOL_PROP_CACHEFILE ||
|
||||
prop == ZPOOL_PROP_ALTROOT ||
|
||||
prop == ZPOOL_PROP_READONLY)
|
||||
continue;
|
||||
|
||||
if (prop == ZPOOL_PROP_VERSION || prop == ZPROP_INVAL) {
|
||||
uint64_t ver;
|
||||
|
||||
if (prop == ZPOOL_PROP_VERSION) {
|
||||
VERIFY(nvpair_value_uint64(elem, &ver) == 0);
|
||||
} else {
|
||||
ASSERT(zpool_prop_feature(nvpair_name(elem)));
|
||||
ver = SPA_VERSION_FEATURES;
|
||||
need_sync = B_TRUE;
|
||||
}
|
||||
|
||||
/* Save time if the version is already set. */
|
||||
if (ver == spa_version(spa))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* In addition to the pool directory object, we might
|
||||
* create the pool properties object, the features for
|
||||
* read object, the features for write object, or the
|
||||
* feature descriptions object.
|
||||
*/
|
||||
error = dsl_sync_task_do(spa_get_dsl(spa), NULL,
|
||||
spa_sync_version, spa, &ver, 6);
|
||||
if (error)
|
||||
return (error);
|
||||
continue;
|
||||
}
|
||||
|
||||
need_sync = B_TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
if (need_sync)
|
||||
if (need_sync) {
|
||||
return (dsl_sync_task_do(spa_get_dsl(spa), NULL, spa_sync_props,
|
||||
spa, nvp, 3));
|
||||
else
|
||||
return (0);
|
||||
spa, nvp, 6));
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -562,6 +675,43 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the GUID for the pool. This is done so that we can later
|
||||
* re-import a pool built from a clone of our own vdevs. We will modify
|
||||
* the root vdev's guid, our own pool guid, and then mark all of our
|
||||
* vdevs dirty. Note that we must make sure that all our vdevs are
|
||||
* online when we do this, or else any vdevs that weren't present
|
||||
* would be orphaned from our pool. We are also going to issue a
|
||||
* sysevent to update any watchers.
|
||||
*/
|
||||
int
|
||||
spa_change_guid(spa_t *spa)
|
||||
{
|
||||
uint64_t oldguid, newguid;
|
||||
uint64_t txg;
|
||||
|
||||
if (!(spa_mode_global & FWRITE))
|
||||
return (EROFS);
|
||||
|
||||
txg = spa_vdev_enter(spa);
|
||||
|
||||
if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY)
|
||||
return (spa_vdev_exit(spa, NULL, txg, ENXIO));
|
||||
|
||||
oldguid = spa_guid(spa);
|
||||
newguid = spa_generate_guid(NULL);
|
||||
ASSERT3U(oldguid, !=, newguid);
|
||||
|
||||
spa->spa_root_vdev->vdev_guid = newguid;
|
||||
spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid);
|
||||
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
|
||||
spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID);
|
||||
|
||||
return (spa_vdev_exit(spa, NULL, txg, 0));
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* SPA state manipulation (open/create/destroy/import/export)
|
||||
@ -610,7 +760,7 @@ static taskq_t *
|
||||
spa_taskq_create(spa_t *spa, const char *name, enum zti_modes mode,
|
||||
uint_t value)
|
||||
{
|
||||
uint_t flags = TASKQ_PREPOPULATE;
|
||||
uint_t flags = 0;
|
||||
boolean_t batch = B_FALSE;
|
||||
|
||||
switch (mode) {
|
||||
@ -988,8 +1138,10 @@ spa_unload(spa_t *spa)
|
||||
}
|
||||
spa->spa_spares.sav_count = 0;
|
||||
|
||||
for (i = 0; i < spa->spa_l2cache.sav_count; i++)
|
||||
for (i = 0; i < spa->spa_l2cache.sav_count; i++) {
|
||||
vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
|
||||
vdev_free(spa->spa_l2cache.sav_vdevs[i]);
|
||||
}
|
||||
if (spa->spa_l2cache.sav_vdevs) {
|
||||
kmem_free(spa->spa_l2cache.sav_vdevs,
|
||||
spa->spa_l2cache.sav_count * sizeof (void *));
|
||||
@ -1003,6 +1155,11 @@ spa_unload(spa_t *spa)
|
||||
|
||||
spa->spa_async_suspended = 0;
|
||||
|
||||
if (spa->spa_comment != NULL) {
|
||||
spa_strfree(spa->spa_comment);
|
||||
spa->spa_comment = NULL;
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
}
|
||||
|
||||
@ -1212,11 +1369,13 @@ spa_load_l2cache(spa_t *spa)
|
||||
|
||||
vd = oldvdevs[i];
|
||||
if (vd != NULL) {
|
||||
ASSERT(vd->vdev_isl2cache);
|
||||
|
||||
if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
|
||||
pool != 0ULL && l2arc_vdev_present(vd))
|
||||
l2arc_remove_vdev(vd);
|
||||
(void) vdev_close(vd);
|
||||
spa_l2cache_remove(vd);
|
||||
vdev_clear_stats(vd);
|
||||
vdev_free(vd);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1523,7 +1682,7 @@ spa_load_verify_done(zio_t *zio)
|
||||
int error = zio->io_error;
|
||||
|
||||
if (error) {
|
||||
if ((BP_GET_LEVEL(bp) != 0 || dmu_ot[type].ot_metadata) &&
|
||||
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
|
||||
type != DMU_OT_INTENT_LOG)
|
||||
atomic_add_64(&sle->sle_meta_count, 1);
|
||||
else
|
||||
@ -1718,6 +1877,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
|
||||
{
|
||||
nvlist_t *config = spa->spa_config;
|
||||
char *ereport = FM_EREPORT_ZFS_POOL;
|
||||
char *comment;
|
||||
int error;
|
||||
uint64_t pool_guid;
|
||||
nvlist_t *nvl;
|
||||
@ -1725,6 +1885,10 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
|
||||
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &pool_guid))
|
||||
return (EINVAL);
|
||||
|
||||
ASSERT(spa->spa_comment == NULL);
|
||||
if (nvlist_lookup_string(config, ZPOOL_CONFIG_COMMENT, &comment) == 0)
|
||||
spa->spa_comment = spa_strdup(comment);
|
||||
|
||||
/*
|
||||
* Versioning wasn't explicitly added to the label until later, so if
|
||||
* it's not present treat it as the initial version.
|
||||
@ -1740,7 +1904,7 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
|
||||
spa_guid_exists(pool_guid, 0)) {
|
||||
error = EEXIST;
|
||||
} else {
|
||||
spa->spa_load_guid = pool_guid;
|
||||
spa->spa_config_guid = pool_guid;
|
||||
|
||||
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_SPLIT,
|
||||
&nvl) == 0) {
|
||||
@ -1748,6 +1912,9 @@ spa_load(spa_t *spa, spa_load_state_t state, spa_import_type_t type,
|
||||
KM_SLEEP) == 0);
|
||||
}
|
||||
|
||||
nvlist_free(spa->spa_load_info);
|
||||
spa->spa_load_info = fnvlist_alloc();
|
||||
|
||||
gethrestime(&spa->spa_loaded_ts);
|
||||
error = spa_load_impl(spa, pool_guid, config, state, type,
|
||||
mosconfig, &ereport);
|
||||
@ -1780,12 +1947,14 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
{
|
||||
int error = 0;
|
||||
nvlist_t *nvroot = NULL;
|
||||
nvlist_t *label;
|
||||
vdev_t *rvd;
|
||||
uberblock_t *ub = &spa->spa_uberblock;
|
||||
uint64_t children, config_cache_txg = spa->spa_config_txg;
|
||||
int orig_mode = spa->spa_mode;
|
||||
int parse;
|
||||
uint64_t obj;
|
||||
boolean_t missing_feat_write = B_FALSE;
|
||||
|
||||
/*
|
||||
* If this is an untrusted config, access the pool in read-only mode.
|
||||
@ -1852,7 +2021,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
*/
|
||||
if (type != SPA_IMPORT_ASSEMBLE) {
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
error = vdev_validate(rvd);
|
||||
error = vdev_validate(rvd, mosconfig);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
|
||||
if (error != 0)
|
||||
@ -1865,19 +2034,78 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
/*
|
||||
* Find the best uberblock.
|
||||
*/
|
||||
vdev_uberblock_load(NULL, rvd, ub);
|
||||
vdev_uberblock_load(rvd, ub, &label);
|
||||
|
||||
/*
|
||||
* If we weren't able to find a single valid uberblock, return failure.
|
||||
*/
|
||||
if (ub->ub_txg == 0)
|
||||
if (ub->ub_txg == 0) {
|
||||
nvlist_free(label);
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, ENXIO));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the pool is newer than the code, we can't open it.
|
||||
* If the pool has an unsupported version we can't open it.
|
||||
*/
|
||||
if (ub->ub_version > SPA_VERSION)
|
||||
if (!SPA_VERSION_IS_SUPPORTED(ub->ub_version)) {
|
||||
nvlist_free(label);
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_VERSION_NEWER, ENOTSUP));
|
||||
}
|
||||
|
||||
if (ub->ub_version >= SPA_VERSION_FEATURES) {
|
||||
nvlist_t *features;
|
||||
|
||||
/*
|
||||
* If we weren't able to find what's necessary for reading the
|
||||
* MOS in the label, return failure.
|
||||
*/
|
||||
if (label == NULL || nvlist_lookup_nvlist(label,
|
||||
ZPOOL_CONFIG_FEATURES_FOR_READ, &features) != 0) {
|
||||
nvlist_free(label);
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA,
|
||||
ENXIO));
|
||||
}
|
||||
|
||||
/*
|
||||
* Update our in-core representation with the definitive values
|
||||
* from the label.
|
||||
*/
|
||||
nvlist_free(spa->spa_label_features);
|
||||
VERIFY(nvlist_dup(features, &spa->spa_label_features, 0) == 0);
|
||||
}
|
||||
|
||||
nvlist_free(label);
|
||||
|
||||
/*
|
||||
* Look through entries in the label nvlist's features_for_read. If
|
||||
* there is a feature listed there which we don't understand then we
|
||||
* cannot open a pool.
|
||||
*/
|
||||
if (ub->ub_version >= SPA_VERSION_FEATURES) {
|
||||
nvlist_t *unsup_feat;
|
||||
|
||||
VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
|
||||
0);
|
||||
|
||||
for (nvpair_t *nvp = nvlist_next_nvpair(spa->spa_label_features,
|
||||
NULL); nvp != NULL;
|
||||
nvp = nvlist_next_nvpair(spa->spa_label_features, nvp)) {
|
||||
if (!zfeature_is_supported(nvpair_name(nvp))) {
|
||||
VERIFY(nvlist_add_string(unsup_feat,
|
||||
nvpair_name(nvp), "") == 0);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nvlist_empty(unsup_feat)) {
|
||||
VERIFY(nvlist_add_nvlist(spa->spa_load_info,
|
||||
ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
|
||||
nvlist_free(unsup_feat);
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP));
|
||||
}
|
||||
|
||||
nvlist_free(unsup_feat);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the vdev guid sum doesn't match the uberblock, we have an
|
||||
@ -1911,7 +2139,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
spa->spa_claim_max_txg = spa->spa_first_txg;
|
||||
spa->spa_prev_software_version = ub->ub_software_version;
|
||||
|
||||
error = dsl_pool_open(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
|
||||
error = dsl_pool_init(spa, spa->spa_first_txg, &spa->spa_dsl_pool);
|
||||
if (error)
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
spa->spa_meta_objset = spa->spa_dsl_pool->dp_meta_objset;
|
||||
@ -1919,6 +2147,84 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
if (spa_dir_prop(spa, DMU_POOL_CONFIG, &spa->spa_config_object) != 0)
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
|
||||
if (spa_version(spa) >= SPA_VERSION_FEATURES) {
|
||||
boolean_t missing_feat_read = B_FALSE;
|
||||
nvlist_t *unsup_feat;
|
||||
|
||||
if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_READ,
|
||||
&spa->spa_feat_for_read_obj) != 0) {
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
|
||||
if (spa_dir_prop(spa, DMU_POOL_FEATURES_FOR_WRITE,
|
||||
&spa->spa_feat_for_write_obj) != 0) {
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
|
||||
if (spa_dir_prop(spa, DMU_POOL_FEATURE_DESCRIPTIONS,
|
||||
&spa->spa_feat_desc_obj) != 0) {
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
}
|
||||
|
||||
VERIFY(nvlist_alloc(&unsup_feat, NV_UNIQUE_NAME, KM_SLEEP) ==
|
||||
0);
|
||||
|
||||
if (!feature_is_supported(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj, spa->spa_feat_desc_obj,
|
||||
unsup_feat))
|
||||
missing_feat_read = B_TRUE;
|
||||
|
||||
if (spa_writeable(spa) || state == SPA_LOAD_TRYIMPORT) {
|
||||
if (!feature_is_supported(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_write_obj, spa->spa_feat_desc_obj,
|
||||
unsup_feat))
|
||||
missing_feat_write = B_TRUE;
|
||||
}
|
||||
|
||||
if (!nvlist_empty(unsup_feat)) {
|
||||
VERIFY(nvlist_add_nvlist(spa->spa_load_info,
|
||||
ZPOOL_CONFIG_UNSUP_FEAT, unsup_feat) == 0);
|
||||
}
|
||||
|
||||
nvlist_free(unsup_feat);
|
||||
|
||||
if (!missing_feat_read) {
|
||||
fnvlist_add_boolean(spa->spa_load_info,
|
||||
ZPOOL_CONFIG_CAN_RDONLY);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the state is SPA_LOAD_TRYIMPORT, our objective is
|
||||
* twofold: to determine whether the pool is available for
|
||||
* import in read-write mode and (if it is not) whether the
|
||||
* pool is available for import in read-only mode. If the pool
|
||||
* is available for import in read-write mode, it is displayed
|
||||
* as available in userland; if it is not available for import
|
||||
* in read-only mode, it is displayed as unavailable in
|
||||
* userland. If the pool is available for import in read-only
|
||||
* mode but not read-write mode, it is displayed as unavailable
|
||||
* in userland with a special note that the pool is actually
|
||||
* available for open in read-only mode.
|
||||
*
|
||||
* As a result, if the state is SPA_LOAD_TRYIMPORT and we are
|
||||
* missing a feature for write, we must first determine whether
|
||||
* the pool can be opened read-only before returning to
|
||||
* userland in order to know whether to display the
|
||||
* abovementioned note.
|
||||
*/
|
||||
if (missing_feat_read || (missing_feat_write &&
|
||||
spa_writeable(spa))) {
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT,
|
||||
ENOTSUP));
|
||||
}
|
||||
}
|
||||
|
||||
spa->spa_is_initializing = B_TRUE;
|
||||
error = dsl_pool_open(spa->spa_dsl_pool);
|
||||
spa->spa_is_initializing = B_FALSE;
|
||||
if (error != 0)
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_CORRUPT_DATA, EIO));
|
||||
|
||||
if (!mosconfig) {
|
||||
uint64_t hostid;
|
||||
nvlist_t *policy = NULL, *nvconfig;
|
||||
@ -1949,7 +2255,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
cmn_err(CE_WARN, "pool '%s' could not be "
|
||||
"loaded as it was last accessed by "
|
||||
"another system (host: %s hostid: 0x%lx). "
|
||||
"See: http://www.sun.com/msg/ZFS-8000-EY",
|
||||
"See: http://illumos.org/msg/ZFS-8000-EY",
|
||||
spa_name(spa), hostname,
|
||||
(unsigned long)hostid);
|
||||
return (EBADF);
|
||||
@ -2136,7 +2442,7 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
nvlist_free(nvconfig);
|
||||
|
||||
/*
|
||||
* Now that we've validate the config, check the state of the
|
||||
* Now that we've validated the config, check the state of the
|
||||
* root vdev. If it can't be opened, it indicates one or
|
||||
* more toplevel vdevs are faulted.
|
||||
*/
|
||||
@ -2149,6 +2455,17 @@ spa_load_impl(spa_t *spa, uint64_t pool_guid, nvlist_t *config,
|
||||
}
|
||||
}
|
||||
|
||||
if (missing_feat_write) {
|
||||
ASSERT(state == SPA_LOAD_TRYIMPORT);
|
||||
|
||||
/*
|
||||
* At this point, we know that we can open the pool in
|
||||
* read-only mode but not read-write mode. We now have enough
|
||||
* information and can return to userland.
|
||||
*/
|
||||
return (spa_vdev_err(rvd, VDEV_AUX_UNSUP_FEAT, ENOTSUP));
|
||||
}
|
||||
|
||||
/*
|
||||
* We've successfully opened the pool, verify that we're ready
|
||||
* to start pushing transactions.
|
||||
@ -2258,10 +2575,18 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
|
||||
return (spa_load(spa, state, SPA_IMPORT_EXISTING, mosconfig));
|
||||
}
|
||||
|
||||
/*
|
||||
* If spa_load() fails this function will try loading prior txg's. If
|
||||
* 'state' is SPA_LOAD_RECOVER and one of these loads succeeds the pool
|
||||
* will be rewound to that txg. If 'state' is not SPA_LOAD_RECOVER this
|
||||
* function will not rewind the pool and will return the same error as
|
||||
* spa_load().
|
||||
*/
|
||||
static int
|
||||
spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
|
||||
uint64_t max_request, int rewind_flags)
|
||||
{
|
||||
nvlist_t *loadinfo = NULL;
|
||||
nvlist_t *config = NULL;
|
||||
int load_error, rewind_error;
|
||||
uint64_t safe_rewind_txg;
|
||||
@ -2290,9 +2615,18 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
|
||||
return (load_error);
|
||||
}
|
||||
|
||||
/* Price of rolling back is discarding txgs, including log */
|
||||
if (state == SPA_LOAD_RECOVER)
|
||||
if (state == SPA_LOAD_RECOVER) {
|
||||
/* Price of rolling back is discarding txgs, including log */
|
||||
spa_set_log_state(spa, SPA_LOG_CLEAR);
|
||||
} else {
|
||||
/*
|
||||
* If we aren't rolling back save the load info from our first
|
||||
* import attempt so that we can restore it after attempting
|
||||
* to rewind.
|
||||
*/
|
||||
loadinfo = spa->spa_load_info;
|
||||
spa->spa_load_info = fnvlist_alloc();
|
||||
}
|
||||
|
||||
spa->spa_load_max_txg = spa->spa_last_ubsync_txg;
|
||||
safe_rewind_txg = spa->spa_last_ubsync_txg - TXG_DEFER_SIZE;
|
||||
@ -2316,7 +2650,20 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
|
||||
if (config && (rewind_error || state != SPA_LOAD_RECOVER))
|
||||
spa_config_set(spa, config);
|
||||
|
||||
return (state == SPA_LOAD_RECOVER ? rewind_error : load_error);
|
||||
if (state == SPA_LOAD_RECOVER) {
|
||||
ASSERT3P(loadinfo, ==, NULL);
|
||||
return (rewind_error);
|
||||
} else {
|
||||
/* Store the rewind info as part of the initial load info */
|
||||
fnvlist_add_nvlist(loadinfo, ZPOOL_CONFIG_REWIND_INFO,
|
||||
spa->spa_load_info);
|
||||
|
||||
/* Restore the initial load info */
|
||||
fnvlist_free(spa->spa_load_info);
|
||||
spa->spa_load_info = loadinfo;
|
||||
|
||||
return (load_error);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2586,8 +2933,50 @@ spa_add_l2cache(spa_t *spa, nvlist_t *config)
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spa_add_feature_stats(spa_t *spa, nvlist_t *config)
|
||||
{
|
||||
nvlist_t *features;
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_CONFIG, RW_READER));
|
||||
VERIFY(nvlist_alloc(&features, NV_UNIQUE_NAME, KM_SLEEP) == 0);
|
||||
|
||||
if (spa->spa_feat_for_read_obj != 0) {
|
||||
for (zap_cursor_init(&zc, spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
ASSERT(za.za_integer_length == sizeof (uint64_t) &&
|
||||
za.za_num_integers == 1);
|
||||
VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
|
||||
za.za_first_integer));
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
if (spa->spa_feat_for_write_obj != 0) {
|
||||
for (zap_cursor_init(&zc, spa->spa_meta_objset,
|
||||
spa->spa_feat_for_write_obj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
ASSERT(za.za_integer_length == sizeof (uint64_t) &&
|
||||
za.za_num_integers == 1);
|
||||
VERIFY3U(0, ==, nvlist_add_uint64(features, za.za_name,
|
||||
za.za_first_integer));
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
}
|
||||
|
||||
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURE_STATS,
|
||||
features) == 0);
|
||||
nvlist_free(features);
|
||||
}
|
||||
|
||||
int
|
||||
spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
|
||||
spa_get_stats(const char *name, nvlist_t **config,
|
||||
char *altroot, size_t buflen)
|
||||
{
|
||||
int error;
|
||||
spa_t *spa;
|
||||
@ -2622,6 +3011,7 @@ spa_get_stats(const char *name, nvlist_t **config, char *altroot, size_t buflen)
|
||||
|
||||
spa_add_spares(spa, *config);
|
||||
spa_add_l2cache(spa, *config);
|
||||
spa_add_feature_stats(spa, *config);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2712,6 +3102,7 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
|
||||
if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
|
||||
strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
|
||||
error = ENOTBLK;
|
||||
vdev_free(vd);
|
||||
goto out;
|
||||
}
|
||||
#endif
|
||||
@ -2821,10 +3212,6 @@ spa_l2cache_drop(spa_t *spa)
|
||||
if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
|
||||
pool != 0ULL && l2arc_vdev_present(vd))
|
||||
l2arc_remove_vdev(vd);
|
||||
if (vd->vdev_isl2cache)
|
||||
spa_l2cache_remove(vd);
|
||||
vdev_clear_stats(vd);
|
||||
(void) vdev_close(vd);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2845,6 +3232,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
nvlist_t **spares, **l2cache;
|
||||
uint_t nspares, nl2cache;
|
||||
uint64_t version, obj;
|
||||
boolean_t has_features;
|
||||
|
||||
/*
|
||||
* If this pool already exists, return failure.
|
||||
@ -2870,10 +3258,18 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(props, zpool_prop_to_name(ZPOOL_PROP_VERSION),
|
||||
&version) != 0)
|
||||
has_features = B_FALSE;
|
||||
for (nvpair_t *elem = nvlist_next_nvpair(props, NULL);
|
||||
elem != NULL; elem = nvlist_next_nvpair(props, elem)) {
|
||||
if (zpool_prop_feature(nvpair_name(elem)))
|
||||
has_features = B_TRUE;
|
||||
}
|
||||
|
||||
if (has_features || nvlist_lookup_uint64(props,
|
||||
zpool_prop_to_name(ZPOOL_PROP_VERSION), &version) != 0) {
|
||||
version = SPA_VERSION;
|
||||
ASSERT(version <= SPA_VERSION);
|
||||
}
|
||||
ASSERT(SPA_VERSION_IS_SUPPORTED(version));
|
||||
|
||||
spa->spa_first_txg = txg;
|
||||
spa->spa_uberblock.ub_txg = txg - 1;
|
||||
@ -2949,8 +3345,10 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
spa->spa_l2cache.sav_sync = B_TRUE;
|
||||
}
|
||||
|
||||
spa->spa_is_initializing = B_TRUE;
|
||||
spa->spa_dsl_pool = dp = dsl_pool_create(spa, zplprops, txg);
|
||||
spa->spa_meta_objset = dp->dp_meta_objset;
|
||||
spa->spa_is_initializing = B_FALSE;
|
||||
|
||||
/*
|
||||
* Create DDTs (dedup tables).
|
||||
@ -2974,6 +3372,9 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
cmn_err(CE_PANIC, "failed to add pool config");
|
||||
}
|
||||
|
||||
if (spa_version(spa) >= SPA_VERSION_FEATURES)
|
||||
spa_feature_create_zap_objects(spa, tx);
|
||||
|
||||
if (zap_add(spa->spa_meta_objset,
|
||||
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_CREATION_VERSION,
|
||||
sizeof (uint64_t), 1, &version, tx) != 0) {
|
||||
@ -3164,7 +3565,7 @@ spa_import_rootpool(char *devpath, char *devid)
|
||||
}
|
||||
#endif
|
||||
if (config == NULL) {
|
||||
cmn_err(CE_NOTE, "Can not read the pool label from '%s'",
|
||||
cmn_err(CE_NOTE, "Cannot read the pool label from '%s'",
|
||||
devpath);
|
||||
return (EIO);
|
||||
}
|
||||
@ -3478,6 +3879,8 @@ spa_tryimport(nvlist_t *tryconfig)
|
||||
state) == 0);
|
||||
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
|
||||
spa->spa_uberblock.ub_timestamp) == 0);
|
||||
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_LOAD_INFO,
|
||||
spa->spa_load_info) == 0);
|
||||
|
||||
/*
|
||||
* If the bootfs property exists on this pool then we
|
||||
@ -3816,7 +4219,7 @@ spa_vdev_attach(spa_t *spa, uint64_t guid, nvlist_t *nvroot, int replacing)
|
||||
pvd = oldvd->vdev_parent;
|
||||
|
||||
if ((error = spa_config_parse(spa, &newrootvd, nvroot, NULL, 0,
|
||||
VDEV_ALLOC_ADD)) != 0)
|
||||
VDEV_ALLOC_ATTACH)) != 0)
|
||||
return (spa_vdev_exit(spa, NULL, txg, EINVAL));
|
||||
|
||||
if (newrootvd->vdev_children != 1)
|
||||
@ -5195,7 +5598,7 @@ spa_sync_nvlist(spa_t *spa, uint64_t obj, nvlist_t *nv, dmu_tx_t *tx)
|
||||
* information. This avoids the dbuf_will_dirty() path and
|
||||
* saves us a pre-read to get data we don't actually care about.
|
||||
*/
|
||||
bufsize = P2ROUNDUP(nvsize, SPA_CONFIG_BLOCKSIZE);
|
||||
bufsize = P2ROUNDUP((uint64_t)nvsize, SPA_CONFIG_BLOCKSIZE);
|
||||
packed = kmem_alloc(bufsize, KM_SLEEP);
|
||||
|
||||
VERIFY(nvlist_pack(nv, &packed, &nvsize, NV_ENCODE_XDR,
|
||||
@ -5280,6 +5683,24 @@ spa_sync_config_object(spa_t *spa, dmu_tx_t *tx)
|
||||
spa_sync_nvlist(spa, spa->spa_config_object, config, tx);
|
||||
}
|
||||
|
||||
static void
|
||||
spa_sync_version(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
spa_t *spa = arg1;
|
||||
uint64_t version = *(uint64_t *)arg2;
|
||||
|
||||
/*
|
||||
* Setting the version is special cased when first creating the pool.
|
||||
*/
|
||||
ASSERT(tx->tx_txg != TXG_INITIAL);
|
||||
|
||||
ASSERT(version <= SPA_VERSION);
|
||||
ASSERT(version >= spa_version(spa));
|
||||
|
||||
spa->spa_uberblock.ub_version = version;
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Set zpool properties.
|
||||
*/
|
||||
@ -5289,32 +5710,38 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
spa_t *spa = arg1;
|
||||
objset_t *mos = spa->spa_meta_objset;
|
||||
nvlist_t *nvp = arg2;
|
||||
nvpair_t *elem;
|
||||
uint64_t intval;
|
||||
char *strval;
|
||||
zpool_prop_t prop;
|
||||
const char *propname;
|
||||
zprop_type_t proptype;
|
||||
nvpair_t *elem = NULL;
|
||||
|
||||
mutex_enter(&spa->spa_props_lock);
|
||||
|
||||
elem = NULL;
|
||||
while ((elem = nvlist_next_nvpair(nvp, elem))) {
|
||||
uint64_t intval;
|
||||
char *strval, *fname;
|
||||
zpool_prop_t prop;
|
||||
const char *propname;
|
||||
zprop_type_t proptype;
|
||||
zfeature_info_t *feature;
|
||||
|
||||
switch (prop = zpool_name_to_prop(nvpair_name(elem))) {
|
||||
case ZPOOL_PROP_VERSION:
|
||||
case ZPROP_INVAL:
|
||||
/*
|
||||
* Only set version for non-zpool-creation cases
|
||||
* (set/import). spa_create() needs special care
|
||||
* for version setting.
|
||||
* We checked this earlier in spa_prop_validate().
|
||||
*/
|
||||
if (tx->tx_txg != TXG_INITIAL) {
|
||||
VERIFY(nvpair_value_uint64(elem,
|
||||
&intval) == 0);
|
||||
ASSERT(intval <= SPA_VERSION);
|
||||
ASSERT(intval >= spa_version(spa));
|
||||
spa->spa_uberblock.ub_version = intval;
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
}
|
||||
ASSERT(zpool_prop_feature(nvpair_name(elem)));
|
||||
|
||||
fname = strchr(nvpair_name(elem), '@') + 1;
|
||||
VERIFY3U(0, ==, zfeature_lookup_name(fname, &feature));
|
||||
|
||||
spa_feature_enable(spa, feature, tx);
|
||||
break;
|
||||
|
||||
case ZPOOL_PROP_VERSION:
|
||||
VERIFY(nvpair_value_uint64(elem, &intval) == 0);
|
||||
/*
|
||||
* The version is synced seperatly before other
|
||||
* properties and should be correct by now.
|
||||
*/
|
||||
ASSERT3U(spa_version(spa), >=, intval);
|
||||
break;
|
||||
|
||||
case ZPOOL_PROP_ALTROOT:
|
||||
@ -5332,19 +5759,29 @@ spa_sync_props(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
* properties.
|
||||
*/
|
||||
break;
|
||||
case ZPOOL_PROP_COMMENT:
|
||||
VERIFY(nvpair_value_string(elem, &strval) == 0);
|
||||
if (spa->spa_comment != NULL)
|
||||
spa_strfree(spa->spa_comment);
|
||||
spa->spa_comment = spa_strdup(strval);
|
||||
/*
|
||||
* We need to dirty the configuration on all the vdevs
|
||||
* so that their labels get updated. It's unnecessary
|
||||
* to do this for pool creation since the vdev's
|
||||
* configuratoin has already been dirtied.
|
||||
*/
|
||||
if (tx->tx_txg != TXG_INITIAL)
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
break;
|
||||
default:
|
||||
/*
|
||||
* Set pool property values in the poolprops mos object.
|
||||
*/
|
||||
if (spa->spa_pool_props_object == 0) {
|
||||
VERIFY((spa->spa_pool_props_object =
|
||||
zap_create(mos, DMU_OT_POOL_PROPS,
|
||||
DMU_OT_NONE, 0, tx)) > 0);
|
||||
|
||||
VERIFY(zap_update(mos,
|
||||
spa->spa_pool_props_object =
|
||||
zap_create_link(mos, DMU_OT_POOL_PROPS,
|
||||
DMU_POOL_DIRECTORY_OBJECT, DMU_POOL_PROPS,
|
||||
8, 1, &spa->spa_pool_props_object, tx)
|
||||
== 0);
|
||||
tx);
|
||||
}
|
||||
|
||||
/* normalize the property name */
|
||||
@ -5443,6 +5880,11 @@ spa_sync_upgrades(spa_t *spa, dmu_tx_t *tx)
|
||||
/* Keeping the freedir open increases spa_minref */
|
||||
spa->spa_minref += 3;
|
||||
}
|
||||
|
||||
if (spa->spa_ubsync.ub_version < SPA_VERSION_FEATURES &&
|
||||
spa->spa_uberblock.ub_version >= SPA_VERSION_FEATURES) {
|
||||
spa_feature_create_zap_objects(spa, tx);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/spa.h>
|
||||
@ -33,6 +35,7 @@
|
||||
#include <sys/utsname.h>
|
||||
#include <sys/systeminfo.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/zfeature.h>
|
||||
#ifdef _KERNEL
|
||||
#include <sys/kobj.h>
|
||||
#include <sys/zone.h>
|
||||
@ -345,6 +348,10 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
|
||||
txg) == 0);
|
||||
VERIFY(nvlist_add_uint64(config, ZPOOL_CONFIG_POOL_GUID,
|
||||
spa_guid(spa)) == 0);
|
||||
VERIFY(spa->spa_comment == NULL || nvlist_add_string(config,
|
||||
ZPOOL_CONFIG_COMMENT, spa->spa_comment) == 0);
|
||||
|
||||
|
||||
#ifdef _KERNEL
|
||||
hostid = zone_get_hostid(NULL);
|
||||
#else /* _KERNEL */
|
||||
@ -403,6 +410,12 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
|
||||
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, nvroot) == 0);
|
||||
nvlist_free(nvroot);
|
||||
|
||||
/*
|
||||
* Store what's necessary for reading the MOS in the label.
|
||||
*/
|
||||
VERIFY(nvlist_add_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
|
||||
spa->spa_label_features) == 0);
|
||||
|
||||
if (getstats && spa_load_state(spa) == SPA_LOAD_NONE) {
|
||||
ddt_histogram_t *ddh;
|
||||
ddt_stat_t *dds;
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/spa.h>
|
||||
@ -101,11 +102,11 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
|
||||
|
||||
/*
|
||||
* Figure out maximum size of history log. We set it at
|
||||
* 1% of pool size, with a max of 32MB and min of 128KB.
|
||||
* 0.1% of pool size, with a max of 1G and min of 128KB.
|
||||
*/
|
||||
shpp->sh_phys_max_off =
|
||||
metaslab_class_get_dspace(spa_normal_class(spa)) / 100;
|
||||
shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 32<<20);
|
||||
metaslab_class_get_dspace(spa_normal_class(spa)) / 1000;
|
||||
shpp->sh_phys_max_off = MIN(shpp->sh_phys_max_off, 1<<30);
|
||||
shpp->sh_phys_max_off = MAX(shpp->sh_phys_max_off, 128<<10);
|
||||
|
||||
dmu_buf_rele(dbp, FTAG);
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -46,6 +48,7 @@
|
||||
#include <sys/arc.h>
|
||||
#include <sys/ddt.h>
|
||||
#include "zfs_prop.h"
|
||||
#include "zfeature_common.h"
|
||||
|
||||
/*
|
||||
* SPA locking
|
||||
@ -214,7 +217,7 @@
|
||||
* Like spa_vdev_enter/exit, these are convenience wrappers -- the actual
|
||||
* locking is, always, based on spa_namespace_lock and spa_config_lock[].
|
||||
*
|
||||
* spa_rename() is also implemented within this file since is requires
|
||||
* spa_rename() is also implemented within this file since it requires
|
||||
* manipulation of the namespace.
|
||||
*/
|
||||
|
||||
@ -481,8 +484,22 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
||||
VERIFY(nvlist_alloc(&spa->spa_load_info, NV_UNIQUE_NAME,
|
||||
KM_SLEEP) == 0);
|
||||
|
||||
if (config != NULL)
|
||||
if (config != NULL) {
|
||||
nvlist_t *features;
|
||||
|
||||
if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_FEATURES_FOR_READ,
|
||||
&features) == 0) {
|
||||
VERIFY(nvlist_dup(features, &spa->spa_label_features,
|
||||
0) == 0);
|
||||
}
|
||||
|
||||
VERIFY(nvlist_dup(config, &spa->spa_config, 0) == 0);
|
||||
}
|
||||
|
||||
if (spa->spa_label_features == NULL) {
|
||||
VERIFY(nvlist_alloc(&spa->spa_label_features, NV_UNIQUE_NAME,
|
||||
KM_SLEEP) == 0);
|
||||
}
|
||||
|
||||
return (spa);
|
||||
}
|
||||
@ -519,6 +536,7 @@ spa_remove(spa_t *spa)
|
||||
|
||||
list_destroy(&spa->spa_config_list);
|
||||
|
||||
nvlist_free(spa->spa_label_features);
|
||||
nvlist_free(spa->spa_load_info);
|
||||
spa_config_set(spa, NULL);
|
||||
|
||||
@ -1027,6 +1045,20 @@ spa_vdev_state_exit(spa_t *spa, vdev_t *vd, int error)
|
||||
* ==========================================================================
|
||||
*/
|
||||
|
||||
void
|
||||
spa_activate_mos_feature(spa_t *spa, const char *feature)
|
||||
{
|
||||
(void) nvlist_add_boolean(spa->spa_label_features, feature);
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
}
|
||||
|
||||
void
|
||||
spa_deactivate_mos_feature(spa_t *spa, const char *feature)
|
||||
{
|
||||
(void) nvlist_remove_all(spa->spa_label_features, feature);
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rename a spa_t.
|
||||
*/
|
||||
@ -1177,12 +1209,22 @@ spa_generate_guid(spa_t *spa)
|
||||
void
|
||||
sprintf_blkptr(char *buf, const blkptr_t *bp)
|
||||
{
|
||||
char *type = NULL;
|
||||
char type[256];
|
||||
char *checksum = NULL;
|
||||
char *compress = NULL;
|
||||
|
||||
if (bp != NULL) {
|
||||
type = dmu_ot[BP_GET_TYPE(bp)].ot_name;
|
||||
if (BP_GET_TYPE(bp) & DMU_OT_NEWTYPE) {
|
||||
dmu_object_byteswap_t bswap =
|
||||
DMU_OT_BYTESWAP(BP_GET_TYPE(bp));
|
||||
(void) snprintf(type, sizeof (type), "bswap %s %s",
|
||||
DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) ?
|
||||
"metadata" : "data",
|
||||
dmu_ot_byteswap[bswap].ob_name);
|
||||
} else {
|
||||
(void) strlcpy(type, dmu_ot[BP_GET_TYPE(bp)].ot_name,
|
||||
sizeof (type));
|
||||
}
|
||||
checksum = zio_checksum_table[BP_GET_CHECKSUM(bp)].ci_name;
|
||||
compress = zio_compress_table[BP_GET_COMPRESS(bp)].ci_name;
|
||||
}
|
||||
@ -1264,6 +1306,12 @@ spa_get_dsl(spa_t *spa)
|
||||
return (spa->spa_dsl_pool);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
spa_is_initializing(spa_t *spa)
|
||||
{
|
||||
return (spa->spa_is_initializing);
|
||||
}
|
||||
|
||||
blkptr_t *
|
||||
spa_get_rootblkptr(spa_t *spa)
|
||||
{
|
||||
@ -1303,13 +1351,24 @@ spa_guid(spa_t *spa)
|
||||
/*
|
||||
* If we fail to parse the config during spa_load(), we can go through
|
||||
* the error path (which posts an ereport) and end up here with no root
|
||||
* vdev. We stash the original pool guid in 'spa_load_guid' to handle
|
||||
* vdev. We stash the original pool guid in 'spa_config_guid' to handle
|
||||
* this case.
|
||||
*/
|
||||
if (spa->spa_root_vdev != NULL)
|
||||
return (spa->spa_root_vdev->vdev_guid);
|
||||
else
|
||||
return (spa->spa_load_guid);
|
||||
return (spa->spa_config_guid);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
spa_load_guid(spa_t *spa)
|
||||
{
|
||||
/*
|
||||
* This is a GUID that exists solely as a reference for the
|
||||
* purposes of the arc. It is generated at load time, and
|
||||
* is never written to persistent storage.
|
||||
*/
|
||||
return (spa->spa_load_guid);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
@ -1536,6 +1595,7 @@ spa_init(int mode)
|
||||
vdev_cache_stat_init();
|
||||
zfs_prop_init();
|
||||
zpool_prop_init();
|
||||
zpool_feature_init();
|
||||
spa_config_load();
|
||||
l2arc_start();
|
||||
}
|
||||
@ -1670,3 +1730,9 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
spa_debug_enabled(spa_t *spa)
|
||||
{
|
||||
return (spa->spa_debug);
|
||||
}
|
||||
|
64
uts/common/fs/zfs/sys/bptree.h
Normal file
64
uts/common/fs/zfs/sys/bptree.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_BPTREE_H
|
||||
#define _SYS_BPTREE_H
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct bptree_phys {
|
||||
uint64_t bt_begin;
|
||||
uint64_t bt_end;
|
||||
uint64_t bt_bytes;
|
||||
uint64_t bt_comp;
|
||||
uint64_t bt_uncomp;
|
||||
} bptree_phys_t;
|
||||
|
||||
typedef struct bptree_entry_phys {
|
||||
blkptr_t be_bp;
|
||||
uint64_t be_birth_txg; /* only delete blocks born after this txg */
|
||||
zbookmark_t be_zb; /* holds traversal resume point if needed */
|
||||
} bptree_entry_phys_t;
|
||||
|
||||
typedef int bptree_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
uint64_t bptree_alloc(objset_t *os, dmu_tx_t *tx);
|
||||
int bptree_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
|
||||
|
||||
void bptree_add(objset_t *os, uint64_t obj, blkptr_t *bp, uint64_t birth_txg,
|
||||
uint64_t bytes, uint64_t comp, uint64_t uncomp, dmu_tx_t *tx);
|
||||
|
||||
int bptree_iterate(objset_t *os, uint64_t obj, boolean_t free,
|
||||
bptree_itor_t func, void *arg, dmu_tx_t *tx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_BPTREE_H */
|
@ -18,8 +18,12 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -70,6 +74,53 @@ typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
typedef struct dsl_dir dsl_dir_t;
|
||||
|
||||
typedef enum dmu_object_byteswap {
|
||||
DMU_BSWAP_UINT8,
|
||||
DMU_BSWAP_UINT16,
|
||||
DMU_BSWAP_UINT32,
|
||||
DMU_BSWAP_UINT64,
|
||||
DMU_BSWAP_ZAP,
|
||||
DMU_BSWAP_DNODE,
|
||||
DMU_BSWAP_OBJSET,
|
||||
DMU_BSWAP_ZNODE,
|
||||
DMU_BSWAP_OLDACL,
|
||||
DMU_BSWAP_ACL,
|
||||
/*
|
||||
* Allocating a new byteswap type number makes the on-disk format
|
||||
* incompatible with any other format that uses the same number.
|
||||
*
|
||||
* Data can usually be structured to work with one of the
|
||||
* DMU_BSWAP_UINT* or DMU_BSWAP_ZAP types.
|
||||
*/
|
||||
DMU_BSWAP_NUMFUNCS
|
||||
} dmu_object_byteswap_t;
|
||||
|
||||
#define DMU_OT_NEWTYPE 0x80
|
||||
#define DMU_OT_METADATA 0x40
|
||||
#define DMU_OT_BYTESWAP_MASK 0x3f
|
||||
|
||||
/*
|
||||
* Defines a uint8_t object type. Object types specify if the data
|
||||
* in the object is metadata (boolean) and how to byteswap the data
|
||||
* (dmu_object_byteswap_t).
|
||||
*/
|
||||
#define DMU_OT(byteswap, metadata) \
|
||||
(DMU_OT_NEWTYPE | \
|
||||
((metadata) ? DMU_OT_METADATA : 0) | \
|
||||
((byteswap) & DMU_OT_BYTESWAP_MASK))
|
||||
|
||||
#define DMU_OT_IS_VALID(ot) (((ot) & DMU_OT_NEWTYPE) ? \
|
||||
((ot) & DMU_OT_BYTESWAP_MASK) < DMU_BSWAP_NUMFUNCS : \
|
||||
(ot) < DMU_OT_NUMTYPES)
|
||||
|
||||
#define DMU_OT_IS_METADATA(ot) (((ot) & DMU_OT_NEWTYPE) ? \
|
||||
((ot) & DMU_OT_METADATA) : \
|
||||
dmu_ot[(ot)].ot_metadata)
|
||||
|
||||
#define DMU_OT_BYTESWAP(ot) (((ot) & DMU_OT_NEWTYPE) ? \
|
||||
((ot) & DMU_OT_BYTESWAP_MASK) : \
|
||||
dmu_ot[(ot)].ot_byteswap)
|
||||
|
||||
typedef enum dmu_object_type {
|
||||
DMU_OT_NONE,
|
||||
/* general: */
|
||||
@ -134,7 +185,35 @@ typedef enum dmu_object_type {
|
||||
DMU_OT_DEADLIST_HDR, /* UINT64 */
|
||||
DMU_OT_DSL_CLONES, /* ZAP */
|
||||
DMU_OT_BPOBJ_SUBOBJ, /* UINT64 */
|
||||
DMU_OT_NUMTYPES
|
||||
/*
|
||||
* Do not allocate new object types here. Doing so makes the on-disk
|
||||
* format incompatible with any other format that uses the same object
|
||||
* type number.
|
||||
*
|
||||
* When creating an object which does not have one of the above types
|
||||
* use the DMU_OTN_* type with the correct byteswap and metadata
|
||||
* values.
|
||||
*
|
||||
* The DMU_OTN_* types do not have entries in the dmu_ot table,
|
||||
* use the DMU_OT_IS_METDATA() and DMU_OT_BYTESWAP() macros instead
|
||||
* of indexing into dmu_ot directly (this works for both DMU_OT_* types
|
||||
* and DMU_OTN_* types).
|
||||
*/
|
||||
DMU_OT_NUMTYPES,
|
||||
|
||||
/*
|
||||
* Names for valid types declared with DMU_OT().
|
||||
*/
|
||||
DMU_OTN_UINT8_DATA = DMU_OT(DMU_BSWAP_UINT8, B_FALSE),
|
||||
DMU_OTN_UINT8_METADATA = DMU_OT(DMU_BSWAP_UINT8, B_TRUE),
|
||||
DMU_OTN_UINT16_DATA = DMU_OT(DMU_BSWAP_UINT16, B_FALSE),
|
||||
DMU_OTN_UINT16_METADATA = DMU_OT(DMU_BSWAP_UINT16, B_TRUE),
|
||||
DMU_OTN_UINT32_DATA = DMU_OT(DMU_BSWAP_UINT32, B_FALSE),
|
||||
DMU_OTN_UINT32_METADATA = DMU_OT(DMU_BSWAP_UINT32, B_TRUE),
|
||||
DMU_OTN_UINT64_DATA = DMU_OT(DMU_BSWAP_UINT64, B_FALSE),
|
||||
DMU_OTN_UINT64_METADATA = DMU_OT(DMU_BSWAP_UINT64, B_TRUE),
|
||||
DMU_OTN_ZAP_DATA = DMU_OT(DMU_BSWAP_ZAP, B_FALSE),
|
||||
DMU_OTN_ZAP_METADATA = DMU_OT(DMU_BSWAP_ZAP, B_TRUE),
|
||||
} dmu_object_type_t;
|
||||
|
||||
typedef enum dmu_objset_type {
|
||||
@ -191,7 +270,7 @@ int dmu_objset_create(const char *name, dmu_objset_type_t type, uint64_t flags,
|
||||
int dmu_objset_clone(const char *name, struct dsl_dataset *clone_origin,
|
||||
uint64_t flags);
|
||||
int dmu_objset_destroy(const char *name, boolean_t defer);
|
||||
int dmu_snapshots_destroy(char *fsname, char *snapname, boolean_t defer);
|
||||
int dmu_snapshots_destroy_nvl(struct nvlist *snaps, boolean_t defer, char *);
|
||||
int dmu_objset_snapshot(char *fsname, char *snapname, char *tag,
|
||||
struct nvlist *props, boolean_t recursive, boolean_t temporary, int fd);
|
||||
int dmu_objset_rename(const char *name, const char *newname,
|
||||
@ -214,6 +293,9 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
*/
|
||||
#define DMU_POOL_DIRECTORY_OBJECT 1
|
||||
#define DMU_POOL_CONFIG "config"
|
||||
#define DMU_POOL_FEATURES_FOR_WRITE "features_for_write"
|
||||
#define DMU_POOL_FEATURES_FOR_READ "features_for_read"
|
||||
#define DMU_POOL_FEATURE_DESCRIPTIONS "feature_descriptions"
|
||||
#define DMU_POOL_ROOT_DATASET "root_dataset"
|
||||
#define DMU_POOL_SYNC_BPOBJ "sync_bplist"
|
||||
#define DMU_POOL_ERRLOG_SCRUB "errlog_scrub"
|
||||
@ -229,6 +311,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
|
||||
#define DMU_POOL_CREATION_VERSION "creation_version"
|
||||
#define DMU_POOL_SCAN "scan"
|
||||
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
|
||||
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
|
||||
|
||||
/*
|
||||
* Allocate an object from this objset. The range of object numbers
|
||||
@ -489,7 +572,7 @@ void dmu_tx_callback_register(dmu_tx_t *tx, dmu_tx_callback_func_t *dcb_func,
|
||||
|
||||
/*
|
||||
* Free up the data blocks for a defined range of a file. If size is
|
||||
* zero, the range from offset to end-of-file is freed.
|
||||
* -1, the range from offset to end-of-file is freed.
|
||||
*/
|
||||
int dmu_free_range(objset_t *os, uint64_t object, uint64_t offset,
|
||||
uint64_t size, dmu_tx_t *tx);
|
||||
@ -559,12 +642,18 @@ typedef struct dmu_object_info {
|
||||
typedef void arc_byteswap_func_t(void *buf, size_t size);
|
||||
|
||||
typedef struct dmu_object_type_info {
|
||||
arc_byteswap_func_t *ot_byteswap;
|
||||
dmu_object_byteswap_t ot_byteswap;
|
||||
boolean_t ot_metadata;
|
||||
char *ot_name;
|
||||
} dmu_object_type_info_t;
|
||||
|
||||
typedef struct dmu_object_byteswap_info {
|
||||
arc_byteswap_func_t *ob_func;
|
||||
char *ob_name;
|
||||
} dmu_object_byteswap_info_t;
|
||||
|
||||
extern const dmu_object_type_info_t dmu_ot[DMU_OT_NUMTYPES];
|
||||
extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
|
||||
|
||||
/*
|
||||
* Get information on a DMU object.
|
||||
@ -700,8 +789,10 @@ typedef void (*dmu_traverse_cb_t)(objset_t *os, void *arg, struct blkptr *bp,
|
||||
void dmu_traverse_objset(objset_t *os, uint64_t txg_start,
|
||||
dmu_traverse_cb_t cb, void *arg);
|
||||
|
||||
int dmu_sendbackup(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
struct vnode *vp, offset_t *off);
|
||||
int dmu_send(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorigin,
|
||||
int outfd, struct vnode *vp, offset_t *off);
|
||||
int dmu_send_estimate(objset_t *tosnap, objset_t *fromsnap, boolean_t fromorign,
|
||||
uint64_t *sizep);
|
||||
|
||||
typedef struct dmu_recv_cookie {
|
||||
/*
|
||||
@ -718,6 +809,7 @@ typedef struct dmu_recv_cookie {
|
||||
char *drc_top_ds;
|
||||
boolean_t drc_newfs;
|
||||
boolean_t drc_force;
|
||||
struct avl_tree *drc_guid_to_ds_map;
|
||||
} dmu_recv_cookie_t;
|
||||
|
||||
int dmu_recv_begin(char *tofs, char *tosnap, char *topds, struct drr_begin *,
|
||||
|
@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_IMPL_H
|
||||
@ -30,6 +31,7 @@
|
||||
#include <sys/zio.h>
|
||||
#include <sys/dnode.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_ioctl.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -264,6 +266,32 @@ static xuio_stats_t xuio_stats = {
|
||||
atomic_add_64(&xuio_stats.stat.value.ui64, (val))
|
||||
#define XUIOSTAT_BUMP(stat) XUIOSTAT_INCR(stat, 1)
|
||||
|
||||
/*
|
||||
* The list of data whose inclusion in a send stream can be pending from
|
||||
* one call to backup_cb to another. Multiple calls to dump_free() and
|
||||
* dump_freeobjects() can be aggregated into a single DRR_FREE or
|
||||
* DRR_FREEOBJECTS replay record.
|
||||
*/
|
||||
typedef enum {
|
||||
PENDING_NONE,
|
||||
PENDING_FREE,
|
||||
PENDING_FREEOBJECTS
|
||||
} dmu_pendop_t;
|
||||
|
||||
typedef struct dmu_sendarg {
|
||||
list_node_t dsa_link;
|
||||
dmu_replay_record_t *dsa_drr;
|
||||
vnode_t *dsa_vp;
|
||||
int dsa_outfd;
|
||||
struct proc *dsa_proc;
|
||||
offset_t *dsa_off;
|
||||
objset_t *dsa_os;
|
||||
zio_cksum_t dsa_zc;
|
||||
uint64_t dsa_toguid;
|
||||
int dsa_err;
|
||||
dmu_pendop_t dsa_pending_op;
|
||||
} dmu_sendarg_t;
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DMU_TRAVERSE_H
|
||||
@ -54,6 +55,9 @@ typedef int (blkptr_cb_t)(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
|
||||
int traverse_dataset(struct dsl_dataset *ds,
|
||||
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
|
||||
int traverse_dataset_destroyed(spa_t *spa, blkptr_t *blkptr,
|
||||
uint64_t txg_start, zbookmark_t *resume, int flags,
|
||||
blkptr_cb_t func, void *arg);
|
||||
int traverse_pool(spa_t *spa,
|
||||
uint64_t txg_start, int flags, blkptr_cb_t func, void *arg);
|
||||
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DATASET_H
|
||||
@ -84,7 +86,12 @@ typedef struct dsl_dataset_phys {
|
||||
uint64_t ds_creation_time; /* seconds since 1970 */
|
||||
uint64_t ds_creation_txg;
|
||||
uint64_t ds_deadlist_obj; /* DMU_OT_DEADLIST */
|
||||
uint64_t ds_used_bytes;
|
||||
/*
|
||||
* ds_referenced_bytes, ds_compressed_bytes, and ds_uncompressed_bytes
|
||||
* include all blocks referenced by this dataset, including those
|
||||
* shared with any other datasets.
|
||||
*/
|
||||
uint64_t ds_referenced_bytes;
|
||||
uint64_t ds_compressed_bytes;
|
||||
uint64_t ds_uncompressed_bytes;
|
||||
uint64_t ds_unique_bytes; /* only relevant to snapshots */
|
||||
@ -149,6 +156,9 @@ typedef struct dsl_dataset {
|
||||
uint64_t ds_reserved; /* cached refreservation */
|
||||
uint64_t ds_quota; /* cached refquota */
|
||||
|
||||
kmutex_t ds_sendstream_lock;
|
||||
list_t ds_sendstreams;
|
||||
|
||||
/* Protected by ds_lock; keep at end of struct for better locality */
|
||||
char ds_snapname[MAXNAMELEN];
|
||||
} dsl_dataset_t;
|
||||
@ -249,6 +259,10 @@ void dsl_dataset_space(dsl_dataset_t *ds,
|
||||
uint64_t *refdbytesp, uint64_t *availbytesp,
|
||||
uint64_t *usedobjsp, uint64_t *availobjsp);
|
||||
uint64_t dsl_dataset_fsid_guid(dsl_dataset_t *ds);
|
||||
int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
|
||||
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_DELEG_H
|
||||
@ -64,7 +65,8 @@ extern "C" {
|
||||
int dsl_deleg_get(const char *ddname, nvlist_t **nvp);
|
||||
int dsl_deleg_set(const char *ddname, nvlist_t *nvp, boolean_t unset);
|
||||
int dsl_deleg_access(const char *ddname, const char *perm, cred_t *cr);
|
||||
int dsl_deleg_access_impl(struct dsl_dataset *ds, const char *perm, cred_t *cr);
|
||||
int dsl_deleg_access_impl(struct dsl_dataset *ds, boolean_t descendent,
|
||||
const char *perm, cred_t *cr);
|
||||
void dsl_deleg_set_create_perms(dsl_dir_t *dd, dmu_tx_t *tx, cred_t *cr);
|
||||
int dsl_deleg_can_allow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
int dsl_deleg_can_unallow(char *ddname, nvlist_t *nvp, cred_t *cr);
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_POOL_H
|
||||
@ -34,6 +35,7 @@
|
||||
#include <sys/ddt.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/bpobj.h>
|
||||
#include <sys/bptree.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -48,7 +50,8 @@ struct dsl_scan;
|
||||
|
||||
/* These macros are for indexing into the zfs_all_blkstats_t. */
|
||||
#define DMU_OT_DEFERRED DMU_OT_NONE
|
||||
#define DMU_OT_TOTAL DMU_OT_NUMTYPES
|
||||
#define DMU_OT_OTHER DMU_OT_NUMTYPES /* place holder for DMU_OT() types */
|
||||
#define DMU_OT_TOTAL (DMU_OT_NUMTYPES + 1)
|
||||
|
||||
typedef struct zfs_blkstat {
|
||||
uint64_t zb_count;
|
||||
@ -85,6 +88,7 @@ typedef struct dsl_pool {
|
||||
uint64_t dp_write_limit;
|
||||
uint64_t dp_tmp_userrefs_obj;
|
||||
bpobj_t dp_free_bpobj;
|
||||
uint64_t dp_bptree_obj;
|
||||
|
||||
struct dsl_scan *dp_scan;
|
||||
|
||||
@ -110,7 +114,8 @@ typedef struct dsl_pool {
|
||||
zfs_all_blkstats_t *dp_blkstats;
|
||||
} dsl_pool_t;
|
||||
|
||||
int dsl_pool_open(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
|
||||
int dsl_pool_init(spa_t *spa, uint64_t txg, dsl_pool_t **dpp);
|
||||
int dsl_pool_open(dsl_pool_t *dp);
|
||||
void dsl_pool_close(dsl_pool_t *dp);
|
||||
dsl_pool_t *dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg);
|
||||
void dsl_pool_sync(dsl_pool_t *dp, uint64_t txg);
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DSL_SCAN_H
|
||||
@ -79,6 +80,9 @@ typedef struct dsl_scan {
|
||||
uint64_t scn_sync_start_time;
|
||||
zio_t *scn_zio_root;
|
||||
|
||||
/* for freeing blocks */
|
||||
boolean_t scn_is_bptree;
|
||||
|
||||
/* for debugging / information */
|
||||
uint64_t scn_visited_this_txg;
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_H
|
||||
@ -47,6 +48,8 @@ extern void metaslab_sync_reassess(metaslab_group_t *mg);
|
||||
#define METASLAB_HINTBP_FAVOR 0x0
|
||||
#define METASLAB_HINTBP_AVOID 0x1
|
||||
#define METASLAB_GANG_HEADER 0x2
|
||||
#define METASLAB_GANG_CHILD 0x4
|
||||
#define METASLAB_GANG_AVOID 0x8
|
||||
|
||||
extern int metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
blkptr_t *bp, int ncopies, uint64_t txg, blkptr_t *hintbp, int flags);
|
||||
|
@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
@ -52,6 +53,7 @@ struct metaslab_group {
|
||||
avl_tree_t mg_metaslab_tree;
|
||||
uint64_t mg_aliquot;
|
||||
uint64_t mg_bonus_area;
|
||||
uint64_t mg_alloc_failures;
|
||||
int64_t mg_bias;
|
||||
int64_t mg_activation_count;
|
||||
metaslab_class_t *mg_class;
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_H
|
||||
@ -92,7 +94,7 @@ struct dsl_pool;
|
||||
/*
|
||||
* Size of block to hold the configuration data (a packed nvlist)
|
||||
*/
|
||||
#define SPA_CONFIG_BLOCKSIZE (1 << 14)
|
||||
#define SPA_CONFIG_BLOCKSIZE (1ULL << 14)
|
||||
|
||||
/*
|
||||
* The DVA size encodings for LSIZE and PSIZE support blocks up to 32MB.
|
||||
@ -260,7 +262,7 @@ typedef struct blkptr {
|
||||
DVA_GET_ASIZE(&(bp)->blk_dva[2]))
|
||||
|
||||
#define BP_GET_UCSIZE(bp) \
|
||||
((BP_GET_LEVEL(bp) > 0 || dmu_ot[BP_GET_TYPE(bp)].ot_metadata) ? \
|
||||
((BP_GET_LEVEL(bp) > 0 || DMU_OT_IS_METADATA(BP_GET_TYPE(bp))) ? \
|
||||
BP_GET_PSIZE(bp) : BP_GET_LSIZE(bp))
|
||||
|
||||
#define BP_GET_NDVAS(bp) \
|
||||
@ -401,8 +403,8 @@ typedef struct blkptr {
|
||||
#include <sys/dmu.h>
|
||||
|
||||
#define BP_GET_BUFC_TYPE(bp) \
|
||||
(((BP_GET_LEVEL(bp) > 0) || (dmu_ot[BP_GET_TYPE(bp)].ot_metadata)) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA);
|
||||
(((BP_GET_LEVEL(bp) > 0) || (DMU_OT_IS_METADATA(BP_GET_TYPE(bp)))) ? \
|
||||
ARC_BUFC_METADATA : ARC_BUFC_DATA)
|
||||
|
||||
typedef enum spa_import_type {
|
||||
SPA_IMPORT_EXISTING,
|
||||
@ -413,8 +415,8 @@ typedef enum spa_import_type {
|
||||
extern int spa_open(const char *pool, spa_t **, void *tag);
|
||||
extern int spa_open_rewind(const char *pool, spa_t **, void *tag,
|
||||
nvlist_t *policy, nvlist_t **config);
|
||||
extern int spa_get_stats(const char *pool, nvlist_t **config,
|
||||
char *altroot, size_t buflen);
|
||||
extern int spa_get_stats(const char *pool, nvlist_t **config, char *altroot,
|
||||
size_t buflen);
|
||||
extern int spa_create(const char *pool, nvlist_t *config, nvlist_t *props,
|
||||
const char *history_str, nvlist_t *zplprops);
|
||||
extern int spa_import_rootpool(char *devpath, char *devid);
|
||||
@ -571,12 +573,14 @@ extern void spa_claim_notify(zio_t *zio);
|
||||
/* Accessor functions */
|
||||
extern boolean_t spa_shutting_down(spa_t *spa);
|
||||
extern struct dsl_pool *spa_get_dsl(spa_t *spa);
|
||||
extern boolean_t spa_is_initializing(spa_t *spa);
|
||||
extern blkptr_t *spa_get_rootblkptr(spa_t *spa);
|
||||
extern void spa_set_rootblkptr(spa_t *spa, const blkptr_t *bp);
|
||||
extern void spa_altroot(spa_t *, char *, size_t);
|
||||
extern int spa_sync_pass(spa_t *spa);
|
||||
extern char *spa_name(spa_t *spa);
|
||||
extern uint64_t spa_guid(spa_t *spa);
|
||||
extern uint64_t spa_load_guid(spa_t *spa);
|
||||
extern uint64_t spa_last_synced_txg(spa_t *spa);
|
||||
extern uint64_t spa_first_txg(spa_t *spa);
|
||||
extern uint64_t spa_syncing_txg(spa_t *spa);
|
||||
@ -601,6 +605,8 @@ extern uint64_t spa_delegation(spa_t *spa);
|
||||
extern objset_t *spa_meta_objset(spa_t *spa);
|
||||
|
||||
/* Miscellaneous support routines */
|
||||
extern void spa_activate_mos_feature(spa_t *spa, const char *feature);
|
||||
extern void spa_deactivate_mos_feature(spa_t *spa, const char *feature);
|
||||
extern int spa_rename(const char *oldname, const char *newname);
|
||||
extern spa_t *spa_by_guid(uint64_t pool_guid, uint64_t device_guid);
|
||||
extern boolean_t spa_guid_exists(uint64_t pool_guid, uint64_t device_guid);
|
||||
@ -610,6 +616,7 @@ extern uint64_t spa_get_random(uint64_t range);
|
||||
extern uint64_t spa_generate_guid(spa_t *spa);
|
||||
extern void sprintf_blkptr(char *buf, const blkptr_t *bp);
|
||||
extern void spa_freeze(spa_t *spa);
|
||||
extern int spa_change_guid(spa_t *spa);
|
||||
extern void spa_upgrade(spa_t *spa, uint64_t version);
|
||||
extern void spa_evict_all(void);
|
||||
extern vdev_t *spa_lookup_by_guid(spa_t *spa, uint64_t guid,
|
||||
@ -697,6 +704,13 @@ _NOTE(CONSTCOND) } while (0)
|
||||
#define dprintf_bp(bp, fmt, ...)
|
||||
#endif
|
||||
|
||||
extern boolean_t spa_debug_enabled(spa_t *spa);
|
||||
#define spa_dbgmsg(spa, ...) \
|
||||
{ \
|
||||
if (spa_debug_enabled(spa)) \
|
||||
zfs_dbgmsg(__VA_ARGS__); \
|
||||
}
|
||||
|
||||
extern int spa_mode_global; /* mode, e.g. FREAD | FWRITE */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPA_IMPL_H
|
||||
@ -110,6 +112,7 @@ struct spa {
|
||||
* Fields protected by spa_namespace_lock.
|
||||
*/
|
||||
char spa_name[MAXNAMELEN]; /* pool name */
|
||||
char *spa_comment; /* comment */
|
||||
avl_node_t spa_avl; /* node in spa_namespace_avl */
|
||||
nvlist_t *spa_config; /* last synced config */
|
||||
nvlist_t *spa_config_syncing; /* currently syncing config */
|
||||
@ -124,6 +127,7 @@ struct spa {
|
||||
uint64_t spa_import_flags; /* import specific flags */
|
||||
taskq_t *spa_zio_taskq[ZIO_TYPES][ZIO_TASKQ_TYPES];
|
||||
dsl_pool_t *spa_dsl_pool;
|
||||
boolean_t spa_is_initializing; /* true while opening pool */
|
||||
metaslab_class_t *spa_normal_class; /* normal data class */
|
||||
metaslab_class_t *spa_log_class; /* intent log data class */
|
||||
uint64_t spa_first_txg; /* first txg after spa_open() */
|
||||
@ -135,11 +139,13 @@ struct spa {
|
||||
objset_t *spa_meta_objset; /* copy of dp->dp_meta_objset */
|
||||
txg_list_t spa_vdev_txg_list; /* per-txg dirty vdev list */
|
||||
vdev_t *spa_root_vdev; /* top-level vdev container */
|
||||
uint64_t spa_load_guid; /* initial guid for spa_load */
|
||||
uint64_t spa_config_guid; /* config pool guid */
|
||||
uint64_t spa_load_guid; /* spa_load initialized guid */
|
||||
list_t spa_config_dirty_list; /* vdevs with dirty config */
|
||||
list_t spa_state_dirty_list; /* vdevs with dirty state */
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
nvlist_t *spa_label_features; /* Features for reading MOS */
|
||||
uint64_t spa_config_object; /* MOS object for pool config */
|
||||
uint64_t spa_config_generation; /* config generation number */
|
||||
uint64_t spa_syncing_txg; /* txg currently syncing */
|
||||
@ -196,6 +202,7 @@ struct spa {
|
||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||
uint8_t spa_suspended; /* pool is suspended */
|
||||
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
||||
boolean_t spa_debug; /* debug enabled? */
|
||||
boolean_t spa_is_root; /* pool is root */
|
||||
int spa_minref; /* num refs when first opened */
|
||||
int spa_mode; /* FREAD | FWRITE */
|
||||
@ -215,7 +222,10 @@ struct spa {
|
||||
boolean_t spa_autoreplace; /* autoreplace set in open */
|
||||
int spa_vdev_locks; /* locks grabbed */
|
||||
uint64_t spa_creation_version; /* version at pool creation */
|
||||
uint64_t spa_prev_software_version;
|
||||
uint64_t spa_prev_software_version; /* See ub_software_version */
|
||||
uint64_t spa_feat_for_write_obj; /* required to write to pool */
|
||||
uint64_t spa_feat_for_read_obj; /* required to read from pool */
|
||||
uint64_t spa_feat_desc_obj; /* Feature descriptions */
|
||||
/*
|
||||
* spa_refcnt & spa_config_lock must be the last elements
|
||||
* because refcount_t changes size based on compilation options.
|
||||
|
@ -18,8 +18,10 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_H
|
||||
@ -48,7 +50,7 @@ extern boolean_t zfs_nocacheflush;
|
||||
extern int vdev_open(vdev_t *);
|
||||
extern void vdev_open_children(vdev_t *);
|
||||
extern boolean_t vdev_uses_zvols(vdev_t *);
|
||||
extern int vdev_validate(vdev_t *);
|
||||
extern int vdev_validate(vdev_t *, boolean_t);
|
||||
extern void vdev_close(vdev_t *);
|
||||
extern int vdev_create(vdev_t *, uint64_t txg, boolean_t isreplace);
|
||||
extern void vdev_reopen(vdev_t *);
|
||||
@ -140,8 +142,8 @@ extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
|
||||
struct uberblock;
|
||||
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
|
||||
extern int vdev_label_number(uint64_t psise, uint64_t offset);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd);
|
||||
extern void vdev_uberblock_load(zio_t *zio, vdev_t *vd, struct uberblock *ub);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label);
|
||||
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
|
||||
|
||||
typedef enum {
|
||||
VDEV_LABEL_CREATE, /* create/add a new device */
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_VDEV_IMPL_H
|
||||
@ -55,7 +56,8 @@ typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||
/*
|
||||
* Virtual device operations
|
||||
*/
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *ashift);
|
||||
typedef int vdev_open_func_t(vdev_t *vd, uint64_t *size, uint64_t *max_size,
|
||||
uint64_t *ashift);
|
||||
typedef void vdev_close_func_t(vdev_t *vd);
|
||||
typedef uint64_t vdev_asize_func_t(vdev_t *vd, uint64_t psize);
|
||||
typedef int vdev_io_start_func_t(zio_t *zio);
|
||||
@ -118,6 +120,7 @@ struct vdev {
|
||||
uint64_t vdev_orig_guid; /* orig. guid prior to remove */
|
||||
uint64_t vdev_asize; /* allocatable device capacity */
|
||||
uint64_t vdev_min_asize; /* min acceptable asize */
|
||||
uint64_t vdev_max_asize; /* max acceptable asize */
|
||||
uint64_t vdev_ashift; /* block alignment shift */
|
||||
uint64_t vdev_state; /* see VDEV_STATE_* #defines */
|
||||
uint64_t vdev_prevstate; /* used when reopening a vdev */
|
||||
@ -199,7 +202,7 @@ struct vdev {
|
||||
* For DTrace to work in userland (libzpool) context, these fields must
|
||||
* remain at the end of the structure. DTrace will use the kernel's
|
||||
* CTF definition for 'struct vdev', and since the size of a kmutex_t is
|
||||
* larger in userland, the offsets for the rest fields would be
|
||||
* larger in userland, the offsets for the rest of the fields would be
|
||||
* incorrect.
|
||||
*/
|
||||
kmutex_t vdev_dtl_lock; /* vdev_dtl_{map,resilver} */
|
||||
@ -254,6 +257,7 @@ typedef struct vdev_label {
|
||||
#define VDEV_LABEL_START_SIZE (2 * sizeof (vdev_label_t) + VDEV_BOOT_SIZE)
|
||||
#define VDEV_LABEL_END_SIZE (2 * sizeof (vdev_label_t))
|
||||
#define VDEV_LABELS 4
|
||||
#define VDEV_BEST_LABEL VDEV_LABELS
|
||||
|
||||
#define VDEV_ALLOC_LOAD 0
|
||||
#define VDEV_ALLOC_ADD 1
|
||||
@ -261,6 +265,7 @@ typedef struct vdev_label {
|
||||
#define VDEV_ALLOC_L2CACHE 3
|
||||
#define VDEV_ALLOC_ROOTPOOL 4
|
||||
#define VDEV_ALLOC_SPLIT 5
|
||||
#define VDEV_ALLOC_ATTACH 6
|
||||
|
||||
/*
|
||||
* Allocate or free a vdev
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
@ -132,6 +133,8 @@ uint64_t zap_create_norm(objset_t *ds, int normflags, dmu_object_type_t ot,
|
||||
uint64_t zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
|
||||
dmu_object_type_t ot, int leaf_blockshift, int indirect_blockshift,
|
||||
dmu_object_type_t bonustype, int bonuslen, dmu_tx_t *tx);
|
||||
uint64_t zap_create_link(objset_t *os, dmu_object_type_t ot,
|
||||
uint64_t parent_obj, const char *name, dmu_tx_t *tx);
|
||||
|
||||
/*
|
||||
* Create a new zapobj with no attributes from the given (unallocated)
|
||||
@ -300,12 +303,6 @@ int zap_add_int_key(objset_t *os, uint64_t obj,
|
||||
int zap_lookup_int_key(objset_t *os, uint64_t obj,
|
||||
uint64_t key, uint64_t *valuep);
|
||||
|
||||
/*
|
||||
* They name is a stringified version of key; increment its value by
|
||||
* delta. Zero values will be zap_remove()-ed.
|
||||
*/
|
||||
int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
|
||||
dmu_tx_t *tx);
|
||||
int zap_increment(objset_t *os, uint64_t obj, const char *name, int64_t delta,
|
||||
dmu_tx_t *tx);
|
||||
|
||||
|
52
uts/common/fs/zfs/sys/zfeature.h
Normal file
52
uts/common/fs/zfs/sys/zfeature.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFEATURE_H
|
||||
#define _SYS_ZFEATURE_H
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include "zfeature_common.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
extern boolean_t feature_is_supported(objset_t *os, uint64_t obj,
|
||||
uint64_t desc_obj, nvlist_t *unsup_feat);
|
||||
|
||||
struct spa;
|
||||
extern void spa_feature_create_zap_objects(struct spa *, dmu_tx_t *);
|
||||
extern void spa_feature_enable(struct spa *, zfeature_info_t *, dmu_tx_t *);
|
||||
extern void spa_feature_incr(struct spa *, zfeature_info_t *, dmu_tx_t *);
|
||||
extern void spa_feature_decr(struct spa *, zfeature_info_t *, dmu_tx_t *);
|
||||
extern boolean_t spa_feature_is_enabled(struct spa *, zfeature_info_t *);
|
||||
extern boolean_t spa_feature_is_active(struct spa *, zfeature_info_t *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _SYS_ZFEATURE_H */
|
@ -218,7 +218,7 @@ int zfs_fastaccesschk_execute(struct znode *, cred_t *);
|
||||
extern int zfs_zaccess_rwx(struct znode *, mode_t, int, cred_t *);
|
||||
extern int zfs_zaccess_unix(struct znode *, mode_t, cred_t *);
|
||||
extern int zfs_acl_access(struct znode *, int, cred_t *);
|
||||
void zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
|
||||
int zfs_acl_chmod_setattr(struct znode *, zfs_acl_t **, uint64_t);
|
||||
int zfs_zaccess_delete(struct znode *, struct znode *, cred_t *);
|
||||
int zfs_zaccess_rename(struct znode *, struct znode *,
|
||||
struct znode *, struct znode *, cred_t *cr);
|
||||
|
@ -22,6 +22,9 @@
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZFS_CONTEXT_H
|
||||
#define _SYS_ZFS_CONTEXT_H
|
||||
@ -39,6 +42,7 @@ extern "C" {
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <sys/taskq.h>
|
||||
#include <sys/taskq_impl.h>
|
||||
#include <sys/buf.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
|
@ -57,6 +57,7 @@ struct zfsvfs {
|
||||
boolean_t z_fuid_dirty; /* need to sync fuid table ? */
|
||||
struct zfs_fuid_info *z_fuid_replay; /* fuid info for replay */
|
||||
zilog_t *z_log; /* intent log pointer */
|
||||
uint_t z_acl_mode; /* acl chmod/mode behavior */
|
||||
uint_t z_acl_inherit; /* acl inheritance behavior */
|
||||
zfs_case_t z_case; /* case-sense */
|
||||
boolean_t z_utf8; /* utf8-only */
|
||||
|
@ -22,6 +22,10 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*/
|
||||
/*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_H
|
||||
#define _ZIO_H
|
||||
@ -269,6 +273,14 @@ typedef struct zbookmark {
|
||||
#define ZB_ZIL_OBJECT (0ULL)
|
||||
#define ZB_ZIL_LEVEL (-2LL)
|
||||
|
||||
#define ZB_IS_ZERO(zb) \
|
||||
((zb)->zb_objset == 0 && (zb)->zb_object == 0 && \
|
||||
(zb)->zb_level == 0 && (zb)->zb_blkid == 0)
|
||||
#define ZB_IS_ROOT(zb) \
|
||||
((zb)->zb_object == ZB_ROOT_OBJECT && \
|
||||
(zb)->zb_level == ZB_ROOT_LEVEL && \
|
||||
(zb)->zb_blkid == ZB_ROOT_BLKID)
|
||||
|
||||
typedef struct zio_prop {
|
||||
enum zio_checksum zp_checksum;
|
||||
enum zio_compress zp_compress;
|
||||
@ -286,6 +298,7 @@ typedef void zio_cksum_finish_f(zio_cksum_report_t *rep,
|
||||
typedef void zio_cksum_free_f(void *cbdata, size_t size);
|
||||
|
||||
struct zio_bad_cksum; /* defined in zio_checksum.h */
|
||||
struct dnode_phys;
|
||||
|
||||
struct zio_cksum_report {
|
||||
struct zio_cksum_report *zcr_next;
|
||||
@ -417,6 +430,9 @@ struct zio {
|
||||
/* FMA state */
|
||||
zio_cksum_report_t *io_cksum_report;
|
||||
uint64_t io_ena;
|
||||
|
||||
/* Taskq dispatching state */
|
||||
taskq_ent_t io_tqent;
|
||||
};
|
||||
|
||||
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
|
||||
@ -552,6 +568,10 @@ extern void zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
|
||||
/* Called from spa_sync(), but primarily an injection handler */
|
||||
extern void spa_handle_ignored_writes(spa_t *spa);
|
||||
|
||||
/* zbookmark functions */
|
||||
boolean_t zbookmark_is_before(const struct dnode_phys *dnp,
|
||||
const zbookmark_t *zb1, const zbookmark_t *zb2);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Portions Copyright 2011 Martin Matuska
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -479,7 +480,7 @@ void
|
||||
txg_delay(dsl_pool_t *dp, uint64_t txg, int ticks)
|
||||
{
|
||||
tx_state_t *tx = &dp->dp_tx;
|
||||
int timeout = ddi_get_lbolt() + ticks;
|
||||
clock_t timeout = ddi_get_lbolt() + ticks;
|
||||
|
||||
/* don't delay if this txg could transition to quiesing immediately */
|
||||
if (tx->tx_open_txg > txg ||
|
||||
|
@ -21,6 +21,8 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -106,7 +108,7 @@ vdev_get_min_asize(vdev_t *vd)
|
||||
vdev_t *pvd = vd->vdev_parent;
|
||||
|
||||
/*
|
||||
* The our parent is NULL (inactive spare or cache) or is the root,
|
||||
* If our parent is NULL (inactive spare or cache) or is the root,
|
||||
* just return our own asize.
|
||||
*/
|
||||
if (pvd == NULL)
|
||||
@ -286,6 +288,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
if (spa->spa_root_vdev == NULL) {
|
||||
ASSERT(ops == &vdev_root_ops);
|
||||
spa->spa_root_vdev = vd;
|
||||
spa->spa_load_guid = spa_generate_guid(NULL);
|
||||
}
|
||||
|
||||
if (guid == 0 && ops != &vdev_hole_ops) {
|
||||
@ -485,7 +488,7 @@ vdev_alloc(spa_t *spa, vdev_t **vdp, nvlist_t *nv, vdev_t *parent, uint_t id,
|
||||
&vd->vdev_removing);
|
||||
}
|
||||
|
||||
if (parent && !parent->vdev_parent) {
|
||||
if (parent && !parent->vdev_parent && alloctype != VDEV_ALLOC_ATTACH) {
|
||||
ASSERT(alloctype == VDEV_ALLOC_LOAD ||
|
||||
alloctype == VDEV_ALLOC_ADD ||
|
||||
alloctype == VDEV_ALLOC_SPLIT ||
|
||||
@ -661,6 +664,8 @@ vdev_top_transfer(vdev_t *svd, vdev_t *tvd)
|
||||
svd->vdev_ms_shift = 0;
|
||||
svd->vdev_ms_count = 0;
|
||||
|
||||
if (tvd->vdev_mg)
|
||||
ASSERT3P(tvd->vdev_mg, ==, svd->vdev_mg);
|
||||
tvd->vdev_mg = svd->vdev_mg;
|
||||
tvd->vdev_ms = svd->vdev_ms;
|
||||
|
||||
@ -732,6 +737,7 @@ vdev_add_parent(vdev_t *cvd, vdev_ops_t *ops)
|
||||
|
||||
mvd->vdev_asize = cvd->vdev_asize;
|
||||
mvd->vdev_min_asize = cvd->vdev_min_asize;
|
||||
mvd->vdev_max_asize = cvd->vdev_max_asize;
|
||||
mvd->vdev_ashift = cvd->vdev_ashift;
|
||||
mvd->vdev_state = cvd->vdev_state;
|
||||
mvd->vdev_crtxg = cvd->vdev_crtxg;
|
||||
@ -1103,7 +1109,8 @@ vdev_open(vdev_t *vd)
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
int error;
|
||||
uint64_t osize = 0;
|
||||
uint64_t asize, psize;
|
||||
uint64_t max_osize = 0;
|
||||
uint64_t asize, max_asize, psize;
|
||||
uint64_t ashift = 0;
|
||||
|
||||
ASSERT(vd->vdev_open_thread == curthread ||
|
||||
@ -1134,7 +1141,7 @@ vdev_open(vdev_t *vd)
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
|
||||
error = vd->vdev_ops->vdev_op_open(vd, &osize, &max_osize, &ashift);
|
||||
|
||||
/*
|
||||
* Reset the vdev_reopening flag so that we actually close
|
||||
@ -1192,6 +1199,7 @@ vdev_open(vdev_t *vd)
|
||||
}
|
||||
|
||||
osize = P2ALIGN(osize, (uint64_t)sizeof (vdev_label_t));
|
||||
max_osize = P2ALIGN(max_osize, (uint64_t)sizeof (vdev_label_t));
|
||||
|
||||
if (vd->vdev_children == 0) {
|
||||
if (osize < SPA_MINDEVSIZE) {
|
||||
@ -1201,6 +1209,8 @@ vdev_open(vdev_t *vd)
|
||||
}
|
||||
psize = osize;
|
||||
asize = osize - (VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE);
|
||||
max_asize = max_osize - (VDEV_LABEL_START_SIZE +
|
||||
VDEV_LABEL_END_SIZE);
|
||||
} else {
|
||||
if (vd->vdev_parent != NULL && osize < SPA_MINDEVSIZE -
|
||||
(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) {
|
||||
@ -1210,6 +1220,7 @@ vdev_open(vdev_t *vd)
|
||||
}
|
||||
psize = 0;
|
||||
asize = osize;
|
||||
max_asize = max_osize;
|
||||
}
|
||||
|
||||
vd->vdev_psize = psize;
|
||||
@ -1229,16 +1240,22 @@ vdev_open(vdev_t *vd)
|
||||
* For testing purposes, a higher ashift can be requested.
|
||||
*/
|
||||
vd->vdev_asize = asize;
|
||||
vd->vdev_max_asize = max_asize;
|
||||
vd->vdev_ashift = MAX(ashift, vd->vdev_ashift);
|
||||
} else {
|
||||
/*
|
||||
* Make sure the alignment requirement hasn't increased.
|
||||
* Detect if the alignment requirement has increased.
|
||||
* We don't want to make the pool unavailable, just
|
||||
* issue a warning instead.
|
||||
*/
|
||||
if (ashift > vd->vdev_top->vdev_ashift) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LABEL);
|
||||
return (EINVAL);
|
||||
if (ashift > vd->vdev_top->vdev_ashift &&
|
||||
vd->vdev_ops->vdev_op_leaf) {
|
||||
cmn_err(CE_WARN,
|
||||
"Disk, '%s', has a block alignment that is "
|
||||
"larger than the pool's alignment\n",
|
||||
vd->vdev_path);
|
||||
}
|
||||
vd->vdev_max_asize = max_asize;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1280,13 +1297,18 @@ vdev_open(vdev_t *vd)
|
||||
* contents. This needs to be done before vdev_load() so that we don't
|
||||
* inadvertently do repair I/Os to the wrong device.
|
||||
*
|
||||
* If 'strict' is false ignore the spa guid check. This is necessary because
|
||||
* if the machine crashed during a re-guid the new guid might have been written
|
||||
* to all of the vdev labels, but not the cached config. The strict check
|
||||
* will be performed when the pool is opened again using the mos config.
|
||||
*
|
||||
* This function will only return failure if one of the vdevs indicates that it
|
||||
* has since been destroyed or exported. This is only possible if
|
||||
* /etc/zfs/zpool.cache was readonly at the time. Otherwise, the vdev state
|
||||
* will be updated but the function will return 0.
|
||||
*/
|
||||
int
|
||||
vdev_validate(vdev_t *vd)
|
||||
vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
nvlist_t *label;
|
||||
@ -1294,7 +1316,7 @@ vdev_validate(vdev_t *vd)
|
||||
uint64_t state;
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
if (vdev_validate(vd->vdev_child[c]) != 0)
|
||||
if (vdev_validate(vd->vdev_child[c], strict) != 0)
|
||||
return (EBADF);
|
||||
|
||||
/*
|
||||
@ -1306,7 +1328,8 @@ vdev_validate(vdev_t *vd)
|
||||
uint64_t aux_guid = 0;
|
||||
nvlist_t *nvl;
|
||||
|
||||
if ((label = vdev_label_read_config(vd)) == NULL) {
|
||||
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) ==
|
||||
NULL) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LABEL);
|
||||
return (0);
|
||||
@ -1324,8 +1347,9 @@ vdev_validate(vdev_t *vd)
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_GUID,
|
||||
&guid) != 0 || guid != spa_guid(spa)) {
|
||||
if (strict && (nvlist_lookup_uint64(label,
|
||||
ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
|
||||
guid != spa_guid(spa))) {
|
||||
vdev_set_state(vd, B_FALSE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
nvlist_free(label);
|
||||
@ -1487,7 +1511,7 @@ vdev_reopen(vdev_t *vd)
|
||||
!l2arc_vdev_present(vd))
|
||||
l2arc_add_vdev(spa, vd);
|
||||
} else {
|
||||
(void) vdev_validate(vd);
|
||||
(void) vdev_validate(vd, B_TRUE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1946,14 +1970,14 @@ vdev_validate_aux(vdev_t *vd)
|
||||
if (!vdev_readable(vd))
|
||||
return (0);
|
||||
|
||||
if ((label = vdev_label_read_config(vd)) == NULL) {
|
||||
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (nvlist_lookup_uint64(label, ZPOOL_CONFIG_VERSION, &version) != 0 ||
|
||||
version > SPA_VERSION ||
|
||||
!SPA_VERSION_IS_SUPPORTED(version) ||
|
||||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_GUID, &guid) != 0 ||
|
||||
guid != vd->vdev_guid ||
|
||||
nvlist_lookup_uint64(label, ZPOOL_CONFIG_POOL_STATE, &state) != 0) {
|
||||
@ -2456,6 +2480,7 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
|
||||
vs->vs_rsize = vdev_get_min_asize(vd);
|
||||
if (vd->vdev_ops->vdev_op_leaf)
|
||||
vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
|
||||
vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
|
||||
mutex_exit(&vd->vdev_stat_lock);
|
||||
|
||||
/*
|
||||
|
@ -71,9 +71,16 @@
|
||||
* 1<<zfs_vdev_cache_bshift byte reads by the vdev_cache (aka software
|
||||
* track buffer). At most zfs_vdev_cache_size bytes will be kept in each
|
||||
* vdev's vdev_cache.
|
||||
*
|
||||
* TODO: Note that with the current ZFS code, it turns out that the
|
||||
* vdev cache is not helpful, and in some cases actually harmful. It
|
||||
* is better if we disable this. Once some time has passed, we should
|
||||
* actually remove this to simplify the code. For now we just disable
|
||||
* it by setting the zfs_vdev_cache_size to zero. Note that Solaris 11
|
||||
* has made these same changes.
|
||||
*/
|
||||
int zfs_vdev_cache_max = 1<<14; /* 16KB */
|
||||
int zfs_vdev_cache_size = 10ULL << 20; /* 10MB */
|
||||
int zfs_vdev_cache_size = 0;
|
||||
int zfs_vdev_cache_bshift = 16;
|
||||
|
||||
#define VCBS (1 << zfs_vdev_cache_bshift) /* 64KB */
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -30,6 +31,7 @@
|
||||
#include <sys/fs/zfs.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/sunldi.h>
|
||||
#include <sys/efi_partition.h>
|
||||
#include <sys/fm/fs/zfs.h>
|
||||
|
||||
/*
|
||||
@ -102,8 +104,39 @@ vdev_disk_rele(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz)
|
||||
{
|
||||
ASSERT(vd->vdev_wholedisk);
|
||||
|
||||
vdev_disk_t *dvd = vd->vdev_tsd;
|
||||
dk_efi_t dk_ioc;
|
||||
efi_gpt_t *efi;
|
||||
uint64_t avail_space = 0;
|
||||
int efisize = EFI_LABEL_SIZE * 2;
|
||||
|
||||
dk_ioc.dki_data = kmem_alloc(efisize, KM_SLEEP);
|
||||
dk_ioc.dki_lba = 1;
|
||||
dk_ioc.dki_length = efisize;
|
||||
dk_ioc.dki_data_64 = (uint64_t)(uintptr_t)dk_ioc.dki_data;
|
||||
efi = dk_ioc.dki_data;
|
||||
|
||||
if (ldi_ioctl(dvd->vd_lh, DKIOCGETEFI, (intptr_t)&dk_ioc,
|
||||
FKIOCTL, kcred, NULL) == 0) {
|
||||
uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
|
||||
|
||||
zfs_dbgmsg("vdev %s, capacity %llu, altern lba %llu",
|
||||
vd->vdev_path, capacity, efi_altern_lba);
|
||||
if (capacity > efi_altern_lba)
|
||||
avail_space = (capacity - efi_altern_lba) * blksz;
|
||||
}
|
||||
kmem_free(dk_ioc.dki_data, efisize);
|
||||
return (avail_space);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
|
||||
vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_disk_t *dvd;
|
||||
@ -273,16 +306,6 @@ skip_open:
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/*
|
||||
* If we own the whole disk, try to enable disk write caching.
|
||||
* We ignore errors because it's OK if we can't do it.
|
||||
*/
|
||||
if (vd->vdev_wholedisk == 1) {
|
||||
int wce = 1;
|
||||
(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
|
||||
FKIOCTL, kcred, NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the device's minimum transfer size.
|
||||
* If the ioctl isn't supported, assume DEV_BSIZE.
|
||||
@ -293,6 +316,25 @@ skip_open:
|
||||
|
||||
*ashift = highbit(MAX(dkmext.dki_pbsize, SPA_MINBLOCKSIZE)) - 1;
|
||||
|
||||
if (vd->vdev_wholedisk == 1) {
|
||||
uint64_t capacity = dkmext.dki_capacity - 1;
|
||||
uint64_t blksz = dkmext.dki_lbsize;
|
||||
int wce = 1;
|
||||
|
||||
/*
|
||||
* If we own the whole disk, try to enable disk write caching.
|
||||
* We ignore errors because it's OK if we can't do it.
|
||||
*/
|
||||
(void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
|
||||
FKIOCTL, kcred, NULL);
|
||||
|
||||
*max_psize = *psize + vdev_disk_get_space(vd, capacity, blksz);
|
||||
zfs_dbgmsg("capacity change: vdev %s, psize %llu, "
|
||||
"max_psize %llu", vd->vdev_path, *psize, *max_psize);
|
||||
} else {
|
||||
*max_psize = *psize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the nowritecache bit, so that on a vdev_reopen() we will
|
||||
* try again.
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -47,7 +48,8 @@ vdev_file_rele(vdev_t *vd)
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
|
||||
vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
vdev_file_t *vf;
|
||||
vnode_t *vp;
|
||||
@ -112,7 +114,7 @@ skip_open:
|
||||
return (error);
|
||||
}
|
||||
|
||||
*psize = vattr.va_size;
|
||||
*max_psize = *psize = vattr.va_size;
|
||||
*ashift = SPA_MINBLOCKSHIFT;
|
||||
|
||||
return (0);
|
||||
|
@ -18,8 +18,10 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -121,6 +123,8 @@
|
||||
* txg Transaction group in which this label was written
|
||||
* pool_guid Unique identifier for this pool
|
||||
* vdev_tree An nvlist describing vdev tree.
|
||||
* features_for_read
|
||||
* An nvlist of the features necessary for reading the MOS.
|
||||
*
|
||||
* Each leaf device label also contains the following:
|
||||
*
|
||||
@ -428,8 +432,13 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
|
||||
kmem_free(array, rvd->vdev_children * sizeof (uint64_t));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the configuration from the label of the given vdev. If 'label' is
|
||||
* VDEV_BEST_LABEL, each label of the vdev will be read until a valid
|
||||
* configuration is found; otherwise, only the specified label will be read.
|
||||
*/
|
||||
nvlist_t *
|
||||
vdev_label_read_config(vdev_t *vd)
|
||||
vdev_label_read_config(vdev_t *vd, int label)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
nvlist_t *config = NULL;
|
||||
@ -447,6 +456,8 @@ vdev_label_read_config(vdev_t *vd)
|
||||
|
||||
retry:
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
if (label >= 0 && label < VDEV_LABELS && label != l)
|
||||
continue;
|
||||
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
|
||||
@ -496,7 +507,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
|
||||
/*
|
||||
* Read the label, if any, and perform some basic sanity checks.
|
||||
*/
|
||||
if ((label = vdev_label_read_config(vd)) == NULL)
|
||||
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
|
||||
@ -833,7 +844,7 @@ retry:
|
||||
* come back up, we fail to see the uberblock for txg + 1 because, say,
|
||||
* it was on a mirrored device and the replica to which we wrote txg + 1
|
||||
* is now offline. If we then make some changes and sync txg + 1, and then
|
||||
* the missing replica comes back, then for a new seconds we'll have two
|
||||
* the missing replica comes back, then for a few seconds we'll have two
|
||||
* conflicting uberblocks on disk with the same txg. The solution is simple:
|
||||
* among uberblocks with equal txg, choose the one with the latest timestamp.
|
||||
*/
|
||||
@ -853,46 +864,50 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
|
||||
return (0);
|
||||
}
|
||||
|
||||
struct ubl_cbdata {
|
||||
uberblock_t *ubl_ubbest; /* Best uberblock */
|
||||
vdev_t *ubl_vd; /* vdev associated with the above */
|
||||
int ubl_label; /* Label associated with the above */
|
||||
};
|
||||
|
||||
static void
|
||||
vdev_uberblock_load_done(zio_t *zio)
|
||||
{
|
||||
vdev_t *vd = zio->io_vd;
|
||||
spa_t *spa = zio->io_spa;
|
||||
zio_t *rio = zio->io_private;
|
||||
uberblock_t *ub = zio->io_data;
|
||||
uberblock_t *ubbest = rio->io_private;
|
||||
struct ubl_cbdata *cbp = rio->io_private;
|
||||
|
||||
ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(zio->io_vd));
|
||||
ASSERT3U(zio->io_size, ==, VDEV_UBERBLOCK_SIZE(vd));
|
||||
|
||||
if (zio->io_error == 0 && uberblock_verify(ub) == 0) {
|
||||
mutex_enter(&rio->io_lock);
|
||||
if (ub->ub_txg <= spa->spa_load_max_txg &&
|
||||
vdev_uberblock_compare(ub, ubbest) > 0)
|
||||
*ubbest = *ub;
|
||||
vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
|
||||
/*
|
||||
* Keep track of the vdev and label in which this
|
||||
* uberblock was found. We will use this information
|
||||
* later to obtain the config nvlist associated with
|
||||
* this uberblock.
|
||||
*/
|
||||
*cbp->ubl_ubbest = *ub;
|
||||
cbp->ubl_vd = vd;
|
||||
cbp->ubl_label = vdev_label_number(vd->vdev_psize,
|
||||
zio->io_offset);
|
||||
}
|
||||
mutex_exit(&rio->io_lock);
|
||||
}
|
||||
|
||||
zio_buf_free(zio->io_data, zio->io_size);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
|
||||
static void
|
||||
vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
|
||||
struct ubl_cbdata *cbp)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
|
||||
|
||||
if (vd == rvd) {
|
||||
ASSERT(zio == NULL);
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
zio = zio_root(spa, NULL, ubbest, flags);
|
||||
bzero(ubbest, sizeof (uberblock_t));
|
||||
}
|
||||
|
||||
ASSERT(zio != NULL);
|
||||
|
||||
for (int c = 0; c < vd->vdev_children; c++)
|
||||
vdev_uberblock_load(zio, vd->vdev_child[c], ubbest);
|
||||
vdev_uberblock_load_impl(zio, vd->vdev_child[c], flags, cbp);
|
||||
|
||||
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
@ -905,11 +920,45 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (vd == rvd) {
|
||||
(void) zio_wait(zio);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
/*
|
||||
* Reads the 'best' uberblock from disk along with its associated
|
||||
* configuration. First, we read the uberblock array of each label of each
|
||||
* vdev, keeping track of the uberblock with the highest txg in each array.
|
||||
* Then, we read the configuration from the same label as the best uberblock.
|
||||
*/
|
||||
void
|
||||
vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
|
||||
{
|
||||
int i;
|
||||
zio_t *zio;
|
||||
spa_t *spa = rvd->vdev_spa;
|
||||
struct ubl_cbdata cb;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
|
||||
|
||||
ASSERT(ub);
|
||||
ASSERT(config);
|
||||
|
||||
bzero(ub, sizeof (uberblock_t));
|
||||
*config = NULL;
|
||||
|
||||
cb.ubl_ubbest = ub;
|
||||
cb.ubl_vd = NULL;
|
||||
|
||||
spa_config_enter(spa, SCL_ALL, FTAG, RW_WRITER);
|
||||
zio = zio_root(spa, NULL, &cb, flags);
|
||||
vdev_uberblock_load_impl(zio, rvd, flags, &cb);
|
||||
(void) zio_wait(zio);
|
||||
if (cb.ubl_vd != NULL) {
|
||||
for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) {
|
||||
*config = vdev_label_read_config(cb.ubl_vd, i);
|
||||
if (*config != NULL)
|
||||
break;
|
||||
}
|
||||
}
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -23,6 +23,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
@ -127,7 +131,8 @@ vdev_mirror_map_alloc(zio_t *zio)
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
int numerrors = 0;
|
||||
int lasterror = 0;
|
||||
@ -149,6 +154,7 @@ vdev_mirror_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
}
|
||||
|
||||
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
|
||||
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
|
||||
*ashift = MAX(*ashift, cvd->vdev_ashift);
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The 'missing' vdev is a special vdev type used only during import. It
|
||||
* signifies a placeholder in the root vdev for some vdev that we know is
|
||||
@ -40,7 +44,8 @@
|
||||
|
||||
/* ARGSUSED */
|
||||
static int
|
||||
vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
|
||||
vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
/*
|
||||
* Really this should just fail. But then the root vdev will be in the
|
||||
@ -49,6 +54,7 @@ vdev_missing_open(vdev_t *vd, uint64_t *psize, uint64_t *ashift)
|
||||
* will fail the GUID sum check before ever trying to open the pool.
|
||||
*/
|
||||
*psize = 0;
|
||||
*max_psize = 0;
|
||||
*ashift = 0;
|
||||
return (0);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -1441,7 +1442,8 @@ vdev_raidz_reconstruct(raidz_map_t *rm, int *t, int nt)
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
vdev_t *cvd;
|
||||
uint64_t nparity = vd->vdev_nparity;
|
||||
@ -1469,10 +1471,12 @@ vdev_raidz_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
}
|
||||
|
||||
*asize = MIN(*asize - 1, cvd->vdev_asize - 1) + 1;
|
||||
*max_asize = MIN(*max_asize - 1, cvd->vdev_max_asize - 1) + 1;
|
||||
*ashift = MAX(*ashift, cvd->vdev_ashift);
|
||||
}
|
||||
|
||||
*asize *= vd->vdev_children;
|
||||
*max_asize *= vd->vdev_children;
|
||||
|
||||
if (numerrors > nparity) {
|
||||
vd->vdev_stat.vs_aux = VDEV_AUX_NO_REPLICAS;
|
||||
|
@ -23,6 +23,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/vdev_impl.h>
|
||||
@ -50,7 +54,8 @@ too_many_errors(vdev_t *vd, int numerrors)
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *max_asize,
|
||||
uint64_t *ashift)
|
||||
{
|
||||
int lasterror = 0;
|
||||
int numerrors = 0;
|
||||
@ -77,6 +82,7 @@ vdev_root_open(vdev_t *vd, uint64_t *asize, uint64_t *ashift)
|
||||
}
|
||||
|
||||
*asize = 0;
|
||||
*max_asize = 0;
|
||||
*ashift = 0;
|
||||
|
||||
return (0);
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -946,6 +947,19 @@ fzap_prefetch(zap_name_t *zn)
|
||||
* Helper functions for consumers.
|
||||
*/
|
||||
|
||||
uint64_t
|
||||
zap_create_link(objset_t *os, dmu_object_type_t ot, uint64_t parent_obj,
|
||||
const char *name, dmu_tx_t *tx)
|
||||
{
|
||||
uint64_t new_obj;
|
||||
|
||||
VERIFY((new_obj = zap_create(os, ot, DMU_OT_NONE, 0, tx)) > 0);
|
||||
VERIFY(zap_add(os, parent_obj, name, sizeof (uint64_t), 1, &new_obj,
|
||||
tx) == 0);
|
||||
|
||||
return (new_obj);
|
||||
}
|
||||
|
||||
int
|
||||
zap_value_search(objset_t *os, uint64_t zapobj, uint64_t value, uint64_t mask,
|
||||
char *name)
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zio.h>
|
||||
@ -460,7 +461,7 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
dmu_object_info_from_db(db, &doi);
|
||||
ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
|
||||
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -584,7 +585,7 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
dmu_object_info_from_db(db, &doi);
|
||||
ASSERT(dmu_ot[doi.doi_type].ot_byteswap == zap_byteswap);
|
||||
ASSERT3U(DMU_OT_BYTESWAP(doi.doi_type), ==, DMU_BSWAP_ZAP);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -1403,7 +1404,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
}
|
||||
|
||||
/*
|
||||
* We lock the zap with adding == FALSE. Because, if we pass
|
||||
* We lock the zap with adding == FALSE. Because, if we pass
|
||||
* the actual value of add, it could trigger a mzap_upgrade().
|
||||
* At present we are just evaluating the possibility of this operation
|
||||
* and hence we donot want to trigger an upgrade.
|
||||
|
414
uts/common/fs/zfs/zfeature.c
Normal file
414
uts/common/fs/zfs/zfeature.c
Normal file
@ -0,0 +1,414 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/nvpair.h>
|
||||
#include <sys/zap.h>
|
||||
#include <sys/dmu_tx.h>
|
||||
#include "zfeature_common.h"
|
||||
#include <sys/spa_impl.h>
|
||||
|
||||
/*
|
||||
* ZFS Feature Flags
|
||||
* -----------------
|
||||
*
|
||||
* ZFS feature flags are used to provide fine-grained versioning to the ZFS
|
||||
* on-disk format. Once enabled on a pool feature flags replace the old
|
||||
* spa_version() number.
|
||||
*
|
||||
* Each new on-disk format change will be given a uniquely identifying string
|
||||
* guid rather than a version number. This avoids the problem of different
|
||||
* organizations creating new on-disk formats with the same version number. To
|
||||
* keep feature guids unique they should consist of the reverse dns name of the
|
||||
* organization which implemented the feature and a short name for the feature,
|
||||
* separated by a colon (e.g. com.delphix:async_destroy).
|
||||
*
|
||||
* Reference Counts
|
||||
* ----------------
|
||||
*
|
||||
* Within each pool features can be in one of three states: disabled, enabled,
|
||||
* or active. These states are differentiated by a reference count stored on
|
||||
* disk for each feature:
|
||||
*
|
||||
* 1) If there is no reference count stored on disk the feature is disabled.
|
||||
* 2) If the reference count is 0 a system administrator has enabled the
|
||||
* feature, but the feature has not been used yet, so no on-disk
|
||||
* format changes have been made.
|
||||
* 3) If the reference count is greater than 0 the feature is active.
|
||||
* The format changes required by the feature are currently on disk.
|
||||
* Note that if the feature's format changes are reversed the feature
|
||||
* may choose to set its reference count back to 0.
|
||||
*
|
||||
* Feature flags makes no differentiation between non-zero reference counts
|
||||
* for an active feature (e.g. a reference count of 1 means the same thing as a
|
||||
* reference count of 27834721), but feature implementations may choose to use
|
||||
* the reference count to store meaningful information. For example, a new RAID
|
||||
* implementation might set the reference count to the number of vdevs using
|
||||
* it. If all those disks are removed from the pool the feature goes back to
|
||||
* having a reference count of 0.
|
||||
*
|
||||
* It is the responsibility of the individual features to maintain a non-zero
|
||||
* reference count as long as the feature's format changes are present on disk.
|
||||
*
|
||||
* Dependencies
|
||||
* ------------
|
||||
*
|
||||
* Each feature may depend on other features. The only effect of this
|
||||
* relationship is that when a feature is enabled all of its dependencies are
|
||||
* automatically enabled as well. Any future work to support disabling of
|
||||
* features would need to ensure that features cannot be disabled if other
|
||||
* enabled features depend on them.
|
||||
*
|
||||
* On-disk Format
|
||||
* --------------
|
||||
*
|
||||
* When feature flags are enabled spa_version() is set to SPA_VERSION_FEATURES
|
||||
* (5000). In order for this to work the pool is automatically upgraded to
|
||||
* SPA_VERSION_BEFORE_FEATURES (28) first, so all pre-feature flags on disk
|
||||
* format changes will be in use.
|
||||
*
|
||||
* Information about features is stored in 3 ZAP objects in the pool's MOS.
|
||||
* These objects are linked to by the following names in the pool directory
|
||||
* object:
|
||||
*
|
||||
* 1) features_for_read: feature guid -> reference count
|
||||
* Features needed to open the pool for reading.
|
||||
* 2) features_for_write: feature guid -> reference count
|
||||
* Features needed to open the pool for writing.
|
||||
* 3) feature_descriptions: feature guid -> descriptive string
|
||||
* A human readable string.
|
||||
*
|
||||
* All enabled features appear in either features_for_read or
|
||||
* features_for_write, but not both.
|
||||
*
|
||||
* To open a pool in read-only mode only the features listed in
|
||||
* features_for_read need to be supported.
|
||||
*
|
||||
* To open the pool in read-write mode features in both features_for_read and
|
||||
* features_for_write need to be supported.
|
||||
*
|
||||
* Some features may be required to read the ZAP objects containing feature
|
||||
* information. To allow software to check for compatibility with these features
|
||||
* before the pool is opened their names must be stored in the label in a
|
||||
* new "features_for_read" entry (note that features that are only required
|
||||
* to write to a pool never need to be stored in the label since the
|
||||
* features_for_write ZAP object can be read before the pool is written to).
|
||||
* To save space in the label features must be explicitly marked as needing to
|
||||
* be written to the label. Also, reference counts are not stored in the label,
|
||||
* instead any feature whose reference count drops to 0 is removed from the
|
||||
* label.
|
||||
*
|
||||
* Adding New Features
|
||||
* -------------------
|
||||
*
|
||||
* Features must be registered in zpool_feature_init() function in
|
||||
* zfeature_common.c using the zfeature_register() function. This function
|
||||
* has arguments to specify if the feature should be stored in the
|
||||
* features_for_read or features_for_write ZAP object and if it needs to be
|
||||
* written to the label when active.
|
||||
*
|
||||
* Once a feature is registered it will appear as a "feature@<feature name>"
|
||||
* property which can be set by an administrator. Feature implementors should
|
||||
* use the spa_feature_is_enabled() and spa_feature_is_active() functions to
|
||||
* query the state of a feature and the spa_feature_incr() and
|
||||
* spa_feature_decr() functions to change an enabled feature's reference count.
|
||||
* Reference counts may only be updated in the syncing context.
|
||||
*
|
||||
* Features may not perform enable-time initialization. Instead, any such
|
||||
* initialization should occur when the feature is first used. This design
|
||||
* enforces that on-disk changes be made only when features are used. Code
|
||||
* should only check if a feature is enabled using spa_feature_is_enabled(),
|
||||
* not by relying on any feature specific metadata existing. If a feature is
|
||||
* enabled, but the feature's metadata is not on disk yet then it should be
|
||||
* created as needed.
|
||||
*
|
||||
* As an example, consider the com.delphix:async_destroy feature. This feature
|
||||
* relies on the existence of a bptree in the MOS that store blocks for
|
||||
* asynchronous freeing. This bptree is not created when async_destroy is
|
||||
* enabled. Instead, when a dataset is destroyed spa_feature_is_enabled() is
|
||||
* called to check if async_destroy is enabled. If it is and the bptree object
|
||||
* does not exist yet, the bptree object is created as part of the dataset
|
||||
* destroy and async_destroy's reference count is incremented to indicate it
|
||||
* has made an on-disk format change. Later, after the destroyed dataset's
|
||||
* blocks have all been asynchronously freed there is no longer any use for the
|
||||
* bptree object, so it is destroyed and async_destroy's reference count is
|
||||
* decremented back to 0 to indicate that it has undone its on-disk format
|
||||
* changes.
|
||||
*/
|
||||
|
||||
typedef enum {
|
||||
FEATURE_ACTION_ENABLE,
|
||||
FEATURE_ACTION_INCR,
|
||||
FEATURE_ACTION_DECR,
|
||||
} feature_action_t;
|
||||
|
||||
/*
|
||||
* Checks that the features active in the specified object are supported by
|
||||
* this software. Adds each unsupported feature (name -> description) to
|
||||
* the supplied nvlist.
|
||||
*/
|
||||
boolean_t
|
||||
feature_is_supported(objset_t *os, uint64_t obj, uint64_t desc_obj,
|
||||
nvlist_t *unsup_feat)
|
||||
{
|
||||
boolean_t supported;
|
||||
zap_cursor_t zc;
|
||||
zap_attribute_t za;
|
||||
|
||||
supported = B_TRUE;
|
||||
for (zap_cursor_init(&zc, os, obj);
|
||||
zap_cursor_retrieve(&zc, &za) == 0;
|
||||
zap_cursor_advance(&zc)) {
|
||||
ASSERT(za.za_integer_length == sizeof (uint64_t) &&
|
||||
za.za_num_integers == 1);
|
||||
|
||||
if (za.za_first_integer != 0 &&
|
||||
!zfeature_is_supported(za.za_name)) {
|
||||
supported = B_FALSE;
|
||||
|
||||
if (unsup_feat != NULL) {
|
||||
char *desc = "";
|
||||
char buf[MAXPATHLEN];
|
||||
|
||||
if (zap_lookup(os, desc_obj, za.za_name,
|
||||
1, sizeof (buf), buf) == 0)
|
||||
desc = buf;
|
||||
|
||||
VERIFY(nvlist_add_string(unsup_feat, za.za_name,
|
||||
desc) == 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
zap_cursor_fini(&zc);
|
||||
|
||||
return (supported);
|
||||
}
|
||||
|
||||
static int
|
||||
feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj,
|
||||
zfeature_info_t *feature, uint64_t *res)
|
||||
{
|
||||
int err;
|
||||
uint64_t refcount;
|
||||
uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
|
||||
|
||||
ASSERT(0 != zapobj);
|
||||
|
||||
err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
|
||||
&refcount);
|
||||
if (err != 0) {
|
||||
if (err == ENOENT)
|
||||
return (ENOTSUP);
|
||||
else
|
||||
return (err);
|
||||
}
|
||||
*res = refcount;
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
feature_do_action(objset_t *os, uint64_t read_obj, uint64_t write_obj,
|
||||
uint64_t desc_obj, zfeature_info_t *feature, feature_action_t action,
|
||||
dmu_tx_t *tx)
|
||||
{
|
||||
int error;
|
||||
uint64_t refcount;
|
||||
uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
|
||||
|
||||
ASSERT(0 != zapobj);
|
||||
ASSERT(zfeature_is_valid_guid(feature->fi_guid));
|
||||
|
||||
error = zap_lookup(os, zapobj, feature->fi_guid,
|
||||
sizeof (uint64_t), 1, &refcount);
|
||||
|
||||
/*
|
||||
* If we can't ascertain the status of the specified feature, an I/O
|
||||
* error occurred.
|
||||
*/
|
||||
if (error != 0 && error != ENOENT)
|
||||
return (error);
|
||||
|
||||
switch (action) {
|
||||
case FEATURE_ACTION_ENABLE:
|
||||
/*
|
||||
* If the feature is already enabled, ignore the request.
|
||||
*/
|
||||
if (error == 0)
|
||||
return (0);
|
||||
refcount = 0;
|
||||
break;
|
||||
case FEATURE_ACTION_INCR:
|
||||
if (error == ENOENT)
|
||||
return (ENOTSUP);
|
||||
if (refcount == UINT64_MAX)
|
||||
return (EOVERFLOW);
|
||||
refcount++;
|
||||
break;
|
||||
case FEATURE_ACTION_DECR:
|
||||
if (error == ENOENT)
|
||||
return (ENOTSUP);
|
||||
if (refcount == 0)
|
||||
return (EOVERFLOW);
|
||||
refcount--;
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (action == FEATURE_ACTION_ENABLE) {
|
||||
int i;
|
||||
|
||||
for (i = 0; feature->fi_depends[i] != NULL; i++) {
|
||||
zfeature_info_t *dep = feature->fi_depends[i];
|
||||
|
||||
error = feature_do_action(os, read_obj, write_obj,
|
||||
desc_obj, dep, FEATURE_ACTION_ENABLE, tx);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
error = zap_update(os, zapobj, feature->fi_guid,
|
||||
sizeof (uint64_t), 1, &refcount, tx);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
if (action == FEATURE_ACTION_ENABLE) {
|
||||
error = zap_update(os, desc_obj,
|
||||
feature->fi_guid, 1, strlen(feature->fi_desc) + 1,
|
||||
feature->fi_desc, tx);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (action == FEATURE_ACTION_INCR && refcount == 1 && feature->fi_mos) {
|
||||
spa_activate_mos_feature(dmu_objset_spa(os), feature->fi_guid);
|
||||
}
|
||||
|
||||
if (action == FEATURE_ACTION_DECR && refcount == 0) {
|
||||
spa_deactivate_mos_feature(dmu_objset_spa(os),
|
||||
feature->fi_guid);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spa_feature_create_zap_objects(spa_t *spa, dmu_tx_t *tx)
|
||||
{
|
||||
/*
|
||||
* We create feature flags ZAP objects in two instances: during pool
|
||||
* creation and during pool upgrade.
|
||||
*/
|
||||
ASSERT(dsl_pool_sync_context(spa_get_dsl(spa)) || (!spa->spa_sync_on &&
|
||||
tx->tx_txg == TXG_INITIAL));
|
||||
|
||||
spa->spa_feat_for_read_obj = zap_create_link(spa->spa_meta_objset,
|
||||
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FEATURES_FOR_READ, tx);
|
||||
spa->spa_feat_for_write_obj = zap_create_link(spa->spa_meta_objset,
|
||||
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FEATURES_FOR_WRITE, tx);
|
||||
spa->spa_feat_desc_obj = zap_create_link(spa->spa_meta_objset,
|
||||
DMU_OTN_ZAP_METADATA, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FEATURE_DESCRIPTIONS, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Enable any required dependencies, then enable the requested feature.
|
||||
*/
|
||||
void
|
||||
spa_feature_enable(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
|
||||
VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
|
||||
spa->spa_feat_desc_obj, feature, FEATURE_ACTION_ENABLE, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the specified feature has not yet been enabled, this function returns
|
||||
* ENOTSUP; otherwise, this function increments the feature's refcount (or
|
||||
* returns EOVERFLOW if the refcount cannot be incremented). This function must
|
||||
* be called from syncing context.
|
||||
*/
|
||||
void
|
||||
spa_feature_incr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
|
||||
VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
|
||||
spa->spa_feat_desc_obj, feature, FEATURE_ACTION_INCR, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* If the specified feature has not yet been enabled, this function returns
|
||||
* ENOTSUP; otherwise, this function decrements the feature's refcount (or
|
||||
* returns EOVERFLOW if the refcount is already 0). This function must
|
||||
* be called from syncing context.
|
||||
*/
|
||||
void
|
||||
spa_feature_decr(spa_t *spa, zfeature_info_t *feature, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT3U(spa_version(spa), >=, SPA_VERSION_FEATURES);
|
||||
VERIFY3U(0, ==, feature_do_action(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
|
||||
spa->spa_feat_desc_obj, feature, FEATURE_ACTION_DECR, tx));
|
||||
}
|
||||
|
||||
boolean_t
|
||||
spa_feature_is_enabled(spa_t *spa, zfeature_info_t *feature)
|
||||
{
|
||||
int err;
|
||||
uint64_t refcount;
|
||||
|
||||
if (spa_version(spa) < SPA_VERSION_FEATURES)
|
||||
return (B_FALSE);
|
||||
|
||||
err = feature_get_refcount(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
|
||||
feature, &refcount);
|
||||
ASSERT(err == 0 || err == ENOTSUP);
|
||||
return (err == 0);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
spa_feature_is_active(spa_t *spa, zfeature_info_t *feature)
|
||||
{
|
||||
int err;
|
||||
uint64_t refcount;
|
||||
|
||||
if (spa_version(spa) < SPA_VERSION_FEATURES)
|
||||
return (B_FALSE);
|
||||
|
||||
err = feature_get_refcount(spa->spa_meta_objset,
|
||||
spa->spa_feat_for_read_obj, spa->spa_feat_for_write_obj,
|
||||
feature, &refcount);
|
||||
ASSERT(err == 0 || err == ENOTSUP);
|
||||
return (err == 0 && refcount > 0);
|
||||
}
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@ -1330,75 +1331,8 @@ zfs_aclset_common(znode_t *zp, zfs_acl_t *aclp, cred_t *cr, dmu_tx_t *tx)
|
||||
return (sa_bulk_update(zp->z_sa_hdl, bulk, count, tx));
|
||||
}
|
||||
|
||||
/*
|
||||
* Update access mask for prepended ACE
|
||||
*
|
||||
* This applies the "groupmask" value for aclmode property.
|
||||
*/
|
||||
static void
|
||||
zfs_acl_prepend_fixup(zfs_acl_t *aclp, void *acep, void *origacep,
|
||||
mode_t mode, uint64_t owner)
|
||||
{
|
||||
int rmask, wmask, xmask;
|
||||
int user_ace;
|
||||
uint16_t aceflags;
|
||||
uint32_t origmask, acepmask;
|
||||
uint64_t fuid;
|
||||
|
||||
aceflags = aclp->z_ops.ace_flags_get(acep);
|
||||
fuid = aclp->z_ops.ace_who_get(acep);
|
||||
origmask = aclp->z_ops.ace_mask_get(origacep);
|
||||
acepmask = aclp->z_ops.ace_mask_get(acep);
|
||||
|
||||
user_ace = (!(aceflags &
|
||||
(ACE_OWNER|ACE_GROUP|ACE_IDENTIFIER_GROUP)));
|
||||
|
||||
if (user_ace && (fuid == owner)) {
|
||||
rmask = S_IRUSR;
|
||||
wmask = S_IWUSR;
|
||||
xmask = S_IXUSR;
|
||||
} else {
|
||||
rmask = S_IRGRP;
|
||||
wmask = S_IWGRP;
|
||||
xmask = S_IXGRP;
|
||||
}
|
||||
|
||||
if (origmask & ACE_READ_DATA) {
|
||||
if (mode & rmask) {
|
||||
acepmask &= ~ACE_READ_DATA;
|
||||
} else {
|
||||
acepmask |= ACE_READ_DATA;
|
||||
}
|
||||
}
|
||||
|
||||
if (origmask & ACE_WRITE_DATA) {
|
||||
if (mode & wmask) {
|
||||
acepmask &= ~ACE_WRITE_DATA;
|
||||
} else {
|
||||
acepmask |= ACE_WRITE_DATA;
|
||||
}
|
||||
}
|
||||
|
||||
if (origmask & ACE_APPEND_DATA) {
|
||||
if (mode & wmask) {
|
||||
acepmask &= ~ACE_APPEND_DATA;
|
||||
} else {
|
||||
acepmask |= ACE_APPEND_DATA;
|
||||
}
|
||||
}
|
||||
|
||||
if (origmask & ACE_EXECUTE) {
|
||||
if (mode & xmask) {
|
||||
acepmask &= ~ACE_EXECUTE;
|
||||
} else {
|
||||
acepmask |= ACE_EXECUTE;
|
||||
}
|
||||
}
|
||||
aclp->z_ops.ace_mask_set(acep, acepmask);
|
||||
}
|
||||
|
||||
static void
|
||||
zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
|
||||
zfs_acl_chmod(vtype_t vtype, uint64_t mode, boolean_t trim, zfs_acl_t *aclp)
|
||||
{
|
||||
void *acep = NULL;
|
||||
uint64_t who;
|
||||
@ -1410,30 +1344,31 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
|
||||
zfs_acl_node_t *newnode;
|
||||
size_t abstract_size = aclp->z_ops.ace_abstract_size();
|
||||
void *zacep;
|
||||
uint32_t owner, group, everyone;
|
||||
uint32_t deny1, deny2, allow0;
|
||||
boolean_t isdir;
|
||||
trivial_acl_t masks;
|
||||
|
||||
new_count = new_bytes = 0;
|
||||
|
||||
acl_trivial_access_masks((mode_t)mode, &allow0, &deny1, &deny2,
|
||||
&owner, &group, &everyone);
|
||||
isdir = (vtype == VDIR);
|
||||
|
||||
acl_trivial_access_masks((mode_t)mode, isdir, &masks);
|
||||
|
||||
newnode = zfs_acl_node_alloc((abstract_size * 6) + aclp->z_acl_bytes);
|
||||
|
||||
zacep = newnode->z_acldata;
|
||||
if (allow0) {
|
||||
zfs_set_ace(aclp, zacep, allow0, ALLOW, -1, ACE_OWNER);
|
||||
if (masks.allow0) {
|
||||
zfs_set_ace(aclp, zacep, masks.allow0, ALLOW, -1, ACE_OWNER);
|
||||
zacep = (void *)((uintptr_t)zacep + abstract_size);
|
||||
new_count++;
|
||||
new_bytes += abstract_size;
|
||||
} if (deny1) {
|
||||
zfs_set_ace(aclp, zacep, deny1, DENY, -1, ACE_OWNER);
|
||||
} if (masks.deny1) {
|
||||
zfs_set_ace(aclp, zacep, masks.deny1, DENY, -1, ACE_OWNER);
|
||||
zacep = (void *)((uintptr_t)zacep + abstract_size);
|
||||
new_count++;
|
||||
new_bytes += abstract_size;
|
||||
}
|
||||
if (deny2) {
|
||||
zfs_set_ace(aclp, zacep, deny2, DENY, -1, OWNING_GROUP);
|
||||
if (masks.deny2) {
|
||||
zfs_set_ace(aclp, zacep, masks.deny2, DENY, -1, OWNING_GROUP);
|
||||
zacep = (void *)((uintptr_t)zacep + abstract_size);
|
||||
new_count++;
|
||||
new_bytes += abstract_size;
|
||||
@ -1452,10 +1387,17 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this ACL has any inheritable ACEs, mark that in
|
||||
* the hints (which are later masked into the pflags)
|
||||
* so create knows to do inheritance.
|
||||
*/
|
||||
if (isdir && (inherit_flags &
|
||||
(ACE_FILE_INHERIT_ACE|ACE_DIRECTORY_INHERIT_ACE)))
|
||||
aclp->z_hints |= ZFS_INHERIT_ACE;
|
||||
|
||||
if ((type != ALLOW && type != DENY) ||
|
||||
(inherit_flags & ACE_INHERIT_ONLY_ACE)) {
|
||||
if (inherit_flags)
|
||||
aclp->z_hints |= ZFS_INHERIT_ACE;
|
||||
switch (type) {
|
||||
case ACE_ACCESS_ALLOWED_OBJECT_ACE_TYPE:
|
||||
case ACE_ACCESS_DENIED_OBJECT_ACE_TYPE:
|
||||
@ -1468,20 +1410,13 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
|
||||
|
||||
/*
|
||||
* Limit permissions to be no greater than
|
||||
* group permissions
|
||||
* group permissions.
|
||||
* The "aclinherit" and "aclmode" properties
|
||||
* affect policy for create and chmod(2),
|
||||
* respectively.
|
||||
*/
|
||||
if (zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED) {
|
||||
if (!(mode & S_IRGRP))
|
||||
access_mask &= ~ACE_READ_DATA;
|
||||
if (!(mode & S_IWGRP))
|
||||
access_mask &=
|
||||
~(ACE_WRITE_DATA|ACE_APPEND_DATA);
|
||||
if (!(mode & S_IXGRP))
|
||||
access_mask &= ~ACE_EXECUTE;
|
||||
access_mask &=
|
||||
~(ACE_WRITE_OWNER|ACE_WRITE_ACL|
|
||||
ACE_WRITE_ATTRIBUTES|ACE_WRITE_NAMED_ATTRS);
|
||||
}
|
||||
if ((type == ALLOW) && trim)
|
||||
access_mask &= masks.group;
|
||||
}
|
||||
zfs_set_ace(aclp, zacep, access_mask, type, who, iflags);
|
||||
ace_size = aclp->z_ops.ace_size(acep);
|
||||
@ -1489,11 +1424,11 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
|
||||
new_count++;
|
||||
new_bytes += ace_size;
|
||||
}
|
||||
zfs_set_ace(aclp, zacep, owner, 0, -1, ACE_OWNER);
|
||||
zfs_set_ace(aclp, zacep, masks.owner, 0, -1, ACE_OWNER);
|
||||
zacep = (void *)((uintptr_t)zacep + abstract_size);
|
||||
zfs_set_ace(aclp, zacep, group, 0, -1, OWNING_GROUP);
|
||||
zfs_set_ace(aclp, zacep, masks.group, 0, -1, OWNING_GROUP);
|
||||
zacep = (void *)((uintptr_t)zacep + abstract_size);
|
||||
zfs_set_ace(aclp, zacep, everyone, 0, -1, ACE_EVERYONE);
|
||||
zfs_set_ace(aclp, zacep, masks.everyone, 0, -1, ACE_EVERYONE);
|
||||
|
||||
new_count += 3;
|
||||
new_bytes += abstract_size * 3;
|
||||
@ -1505,17 +1440,27 @@ zfs_acl_chmod(zfsvfs_t *zfsvfs, uint64_t mode, zfs_acl_t *aclp)
|
||||
list_insert_tail(&aclp->z_acl, newnode);
|
||||
}
|
||||
|
||||
void
|
||||
int
|
||||
zfs_acl_chmod_setattr(znode_t *zp, zfs_acl_t **aclp, uint64_t mode)
|
||||
{
|
||||
int error = 0;
|
||||
|
||||
mutex_enter(&zp->z_acl_lock);
|
||||
mutex_enter(&zp->z_lock);
|
||||
*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
|
||||
(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
|
||||
zfs_acl_chmod(zp->z_zfsvfs, mode, *aclp);
|
||||
if (zp->z_zfsvfs->z_acl_mode == ZFS_ACL_DISCARD)
|
||||
*aclp = zfs_acl_alloc(zfs_acl_version_zp(zp));
|
||||
else
|
||||
error = zfs_acl_node_read(zp, B_TRUE, aclp, B_TRUE);
|
||||
|
||||
if (error == 0) {
|
||||
(*aclp)->z_hints = zp->z_pflags & V4_ACL_WIDE_FLAGS;
|
||||
zfs_acl_chmod(ZTOV(zp)->v_type, mode,
|
||||
(zp->z_zfsvfs->z_acl_mode == ZFS_ACL_GROUPMASK), *aclp);
|
||||
}
|
||||
mutex_exit(&zp->z_lock);
|
||||
mutex_exit(&zp->z_acl_lock);
|
||||
ASSERT(*aclp);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1763,8 +1708,8 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
|
||||
if (acl_ids->z_aclp == NULL) {
|
||||
mutex_enter(&dzp->z_acl_lock);
|
||||
mutex_enter(&dzp->z_lock);
|
||||
if (!(flag & IS_ROOT_NODE) && (ZTOV(dzp)->v_type == VDIR &&
|
||||
(dzp->z_pflags & ZFS_INHERIT_ACE)) &&
|
||||
if (!(flag & IS_ROOT_NODE) &&
|
||||
(dzp->z_pflags & ZFS_INHERIT_ACE) &&
|
||||
!(dzp->z_pflags & ZFS_XATTR)) {
|
||||
VERIFY(0 == zfs_acl_node_read(dzp, B_TRUE,
|
||||
&paclp, B_FALSE));
|
||||
@ -1781,7 +1726,9 @@ zfs_acl_ids_create(znode_t *dzp, int flag, vattr_t *vap, cred_t *cr,
|
||||
if (need_chmod) {
|
||||
acl_ids->z_aclp->z_hints |= (vap->va_type == VDIR) ?
|
||||
ZFS_ACL_AUTO_INHERIT : 0;
|
||||
zfs_acl_chmod(zfsvfs, acl_ids->z_mode, acl_ids->z_aclp);
|
||||
zfs_acl_chmod(vap->va_type, acl_ids->z_mode,
|
||||
(zfsvfs->z_acl_inherit == ZFS_ACL_RESTRICTED),
|
||||
acl_ids->z_aclp);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -23,6 +23,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/spa.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/vdev.h>
|
||||
@ -709,6 +713,10 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
|
||||
|
||||
if (report->zcr_ereport == NULL) {
|
||||
report->zcr_free(report->zcr_cbdata, report->zcr_cbinfo);
|
||||
if (report->zcr_ckinfo != NULL) {
|
||||
kmem_free(report->zcr_ckinfo,
|
||||
sizeof (*report->zcr_ckinfo));
|
||||
}
|
||||
kmem_free(report, sizeof (*report));
|
||||
return;
|
||||
}
|
||||
|
@ -18,8 +18,13 @@
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Portions Copyright 2011 Martin Matuska
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@ -48,6 +53,7 @@
|
||||
#include <sys/dsl_prop.h>
|
||||
#include <sys/dsl_deleg.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/dmu_impl.h>
|
||||
#include <sys/ddi.h>
|
||||
#include <sys/sunddi.h>
|
||||
#include <sys/sunldi.h>
|
||||
@ -347,17 +353,37 @@ zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
|
||||
return (zfs_dozonecheck_impl(dataset, zoned, cr));
|
||||
}
|
||||
|
||||
/*
|
||||
* If name ends in a '@', then require recursive permissions.
|
||||
*/
|
||||
int
|
||||
zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
|
||||
{
|
||||
int error;
|
||||
boolean_t descendent = B_FALSE;
|
||||
dsl_dataset_t *ds;
|
||||
char *at;
|
||||
|
||||
error = zfs_dozonecheck(name, cr);
|
||||
at = strchr(name, '@');
|
||||
if (at != NULL && at[1] == '\0') {
|
||||
*at = '\0';
|
||||
descendent = B_TRUE;
|
||||
}
|
||||
|
||||
error = dsl_dataset_hold(name, FTAG, &ds);
|
||||
if (at != NULL)
|
||||
*at = '@';
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
error = zfs_dozonecheck_ds(name, ds, cr);
|
||||
if (error == 0) {
|
||||
error = secpolicy_zfs(cr);
|
||||
if (error)
|
||||
error = dsl_deleg_access(name, perm, cr);
|
||||
error = dsl_deleg_access_impl(ds, descendent, perm, cr);
|
||||
}
|
||||
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -371,7 +397,7 @@ zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
|
||||
if (error == 0) {
|
||||
error = secpolicy_zfs(cr);
|
||||
if (error)
|
||||
error = dsl_deleg_access_impl(ds, perm, cr);
|
||||
error = dsl_deleg_access_impl(ds, B_FALSE, perm, cr);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
@ -678,21 +704,14 @@ zfs_secpolicy_destroy(zfs_cmd_t *zc, cred_t *cr)
|
||||
/*
|
||||
* Destroying snapshots with delegated permissions requires
|
||||
* descendent mount and destroy permissions.
|
||||
* Reassemble the full filesystem@snap name so dsl_deleg_access()
|
||||
* can do the correct permission check.
|
||||
*
|
||||
* Since this routine is used when doing a recursive destroy of snapshots
|
||||
* and destroying snapshots requires descendent permissions, a successfull
|
||||
* check of the top level snapshot applies to snapshots of all descendent
|
||||
* datasets as well.
|
||||
*/
|
||||
static int
|
||||
zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, cred_t *cr)
|
||||
zfs_secpolicy_destroy_recursive(zfs_cmd_t *zc, cred_t *cr)
|
||||
{
|
||||
int error;
|
||||
char *dsname;
|
||||
|
||||
dsname = kmem_asprintf("%s@%s", zc->zc_name, zc->zc_value);
|
||||
dsname = kmem_asprintf("%s@", zc->zc_name);
|
||||
|
||||
error = zfs_secpolicy_destroy_perms(dsname, cr);
|
||||
|
||||
@ -1116,6 +1135,8 @@ getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
|
||||
/*
|
||||
* Find a zfsvfs_t for a mounted filesystem, or create our own, in which
|
||||
* case its z_vfs will be NULL, and it will be opened as the owner.
|
||||
* If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
|
||||
* which prevents all vnode ops from running.
|
||||
*/
|
||||
static int
|
||||
zfsvfs_hold(const char *name, void *tag, zfsvfs_t **zfvp, boolean_t writer)
|
||||
@ -1179,7 +1200,7 @@ zfs_ioc_pool_create(zfs_cmd_t *zc)
|
||||
|
||||
(void) nvlist_lookup_uint64(props,
|
||||
zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
|
||||
if (version < SPA_VERSION_INITIAL || version > SPA_VERSION) {
|
||||
if (!SPA_VERSION_IS_SUPPORTED(version)) {
|
||||
error = EINVAL;
|
||||
goto pool_props_bad;
|
||||
}
|
||||
@ -1303,6 +1324,15 @@ zfs_ioc_pool_configs(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of the pool
|
||||
*
|
||||
* outputs:
|
||||
* zc_cookie real errno
|
||||
* zc_nvlist_dst config nvlist
|
||||
* zc_nvlist_dst_size size of config nvlist
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_pool_stats(zfs_cmd_t *zc)
|
||||
{
|
||||
@ -1404,7 +1434,8 @@ zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
|
||||
if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
|
||||
return (error);
|
||||
|
||||
if (zc->zc_cookie < spa_version(spa) || zc->zc_cookie > SPA_VERSION) {
|
||||
if (zc->zc_cookie < spa_version(spa) ||
|
||||
!SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
|
||||
spa_close(spa, FTAG);
|
||||
return (EINVAL);
|
||||
}
|
||||
@ -1447,6 +1478,20 @@ zfs_ioc_pool_get_history(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_pool_reguid(zfs_cmd_t *zc)
|
||||
{
|
||||
spa_t *spa;
|
||||
int error;
|
||||
|
||||
error = spa_open(zc->zc_name, &spa, FTAG);
|
||||
if (error == 0) {
|
||||
error = spa_change_guid(spa);
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
|
||||
{
|
||||
@ -1744,9 +1789,12 @@ zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
|
||||
* inconsistent. So this is a bit of a workaround...
|
||||
* XXX reading with out owning
|
||||
*/
|
||||
if (!zc->zc_objset_stats.dds_inconsistent) {
|
||||
if (dmu_objset_type(os) == DMU_OST_ZVOL)
|
||||
VERIFY(zvol_get_stats(os, nv) == 0);
|
||||
if (!zc->zc_objset_stats.dds_inconsistent &&
|
||||
dmu_objset_type(os) == DMU_OST_ZVOL) {
|
||||
error = zvol_get_stats(os, nv);
|
||||
if (error == EIO)
|
||||
return (error);
|
||||
VERIFY3S(error, ==, 0);
|
||||
}
|
||||
error = put_nvlist(zc, nv);
|
||||
nvlist_free(nv);
|
||||
@ -1943,8 +1991,10 @@ top:
|
||||
uint64_t cookie = 0;
|
||||
int len = sizeof (zc->zc_name) - (p - zc->zc_name);
|
||||
|
||||
while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0)
|
||||
(void) dmu_objset_prefetch(p, NULL);
|
||||
while (dmu_dir_list_next(os, len, p, NULL, &cookie) == 0) {
|
||||
if (!dataset_name_hidden(zc->zc_name))
|
||||
(void) dmu_objset_prefetch(zc->zc_name, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
@ -1953,8 +2003,7 @@ top:
|
||||
NULL, &zc->zc_cookie);
|
||||
if (error == ENOENT)
|
||||
error = ESRCH;
|
||||
} while (error == 0 && dataset_name_hidden(zc->zc_name) &&
|
||||
!(zc->zc_iflags & FKIOCTL));
|
||||
} while (error == 0 && dataset_name_hidden(zc->zc_name));
|
||||
dmu_objset_rele(os, FTAG);
|
||||
|
||||
/*
|
||||
@ -2232,6 +2281,8 @@ retry:
|
||||
if (nvpair_type(propval) !=
|
||||
DATA_TYPE_UINT64_ARRAY)
|
||||
err = EINVAL;
|
||||
} else {
|
||||
err = EINVAL;
|
||||
}
|
||||
} else if (err == 0) {
|
||||
if (nvpair_type(propval) == DATA_TYPE_STRING) {
|
||||
@ -3098,25 +3149,45 @@ zfs_unmount_snap(const char *name, void *arg)
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
* zc_value short name of snapshot
|
||||
* zc_name name of filesystem, snaps must be under it
|
||||
* zc_nvlist_src[_size] full names of snapshots to destroy
|
||||
* zc_defer_destroy mark for deferred destroy
|
||||
*
|
||||
* outputs: none
|
||||
* outputs:
|
||||
* zc_name on failure, name of failed snapshot
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_destroy_snaps(zfs_cmd_t *zc)
|
||||
zfs_ioc_destroy_snaps_nvl(zfs_cmd_t *zc)
|
||||
{
|
||||
int err;
|
||||
int err, len;
|
||||
nvlist_t *nvl;
|
||||
nvpair_t *pair;
|
||||
|
||||
if (snapshot_namecheck(zc->zc_value, NULL, NULL) != 0)
|
||||
return (EINVAL);
|
||||
err = dmu_objset_find(zc->zc_name,
|
||||
zfs_unmount_snap, zc->zc_value, DS_FIND_CHILDREN);
|
||||
if (err)
|
||||
if ((err = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
|
||||
zc->zc_iflags, &nvl)) != 0)
|
||||
return (err);
|
||||
return (dmu_snapshots_destroy(zc->zc_name, zc->zc_value,
|
||||
zc->zc_defer_destroy));
|
||||
|
||||
len = strlen(zc->zc_name);
|
||||
for (pair = nvlist_next_nvpair(nvl, NULL); pair != NULL;
|
||||
pair = nvlist_next_nvpair(nvl, pair)) {
|
||||
const char *name = nvpair_name(pair);
|
||||
/*
|
||||
* The snap name must be underneath the zc_name. This ensures
|
||||
* that our permission checks were legitimate.
|
||||
*/
|
||||
if (strncmp(zc->zc_name, name, len) != 0 ||
|
||||
(name[len] != '@' && name[len] != '/')) {
|
||||
nvlist_free(nvl);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
(void) zfs_unmount_snap(name, NULL);
|
||||
}
|
||||
|
||||
err = dmu_snapshots_destroy_nvl(nvl, zc->zc_defer_destroy,
|
||||
zc->zc_name);
|
||||
nvlist_free(nvl);
|
||||
return (err);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3759,6 +3830,8 @@ out:
|
||||
* zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
|
||||
* zc_sendobj objsetid of snapshot to send
|
||||
* zc_fromobj objsetid of incremental fromsnap (may be zero)
|
||||
* zc_guid if set, estimate size of stream only. zc_cookie is ignored.
|
||||
* output size in zc_objset_type.
|
||||
*
|
||||
* outputs: none
|
||||
*/
|
||||
@ -3767,13 +3840,13 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
{
|
||||
objset_t *fromsnap = NULL;
|
||||
objset_t *tosnap;
|
||||
file_t *fp;
|
||||
int error;
|
||||
offset_t off;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_dataset_t *dsfrom = NULL;
|
||||
spa_t *spa;
|
||||
dsl_pool_t *dp;
|
||||
boolean_t estimate = (zc->zc_guid != 0);
|
||||
|
||||
error = spa_open(zc->zc_name, &spa, FTAG);
|
||||
if (error)
|
||||
@ -3814,26 +3887,75 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
|
||||
fp = getf(zc->zc_cookie);
|
||||
if (fp == NULL) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
if (dsfrom)
|
||||
dsl_dataset_rele(dsfrom, FTAG);
|
||||
return (EBADF);
|
||||
if (estimate) {
|
||||
error = dmu_send_estimate(tosnap, fromsnap, zc->zc_obj,
|
||||
&zc->zc_objset_type);
|
||||
} else {
|
||||
file_t *fp = getf(zc->zc_cookie);
|
||||
if (fp == NULL) {
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
if (dsfrom)
|
||||
dsl_dataset_rele(dsfrom, FTAG);
|
||||
return (EBADF);
|
||||
}
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send(tosnap, fromsnap, zc->zc_obj,
|
||||
zc->zc_cookie, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
releasef(zc->zc_cookie);
|
||||
}
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_sendbackup(tosnap, fromsnap, zc->zc_obj, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
releasef(zc->zc_cookie);
|
||||
if (dsfrom)
|
||||
dsl_dataset_rele(dsfrom, FTAG);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of snapshot on which to report progress
|
||||
* zc_cookie file descriptor of send stream
|
||||
*
|
||||
* outputs:
|
||||
* zc_cookie number of bytes written in send stream thus far
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_send_progress(zfs_cmd_t *zc)
|
||||
{
|
||||
dsl_dataset_t *ds;
|
||||
dmu_sendarg_t *dsp = NULL;
|
||||
int error;
|
||||
|
||||
if ((error = dsl_dataset_hold(zc->zc_name, FTAG, &ds)) != 0)
|
||||
return (error);
|
||||
|
||||
mutex_enter(&ds->ds_sendstream_lock);
|
||||
|
||||
/*
|
||||
* Iterate over all the send streams currently active on this dataset.
|
||||
* If there's one which matches the specified file descriptor _and_ the
|
||||
* stream was started by the current process, return the progress of
|
||||
* that stream.
|
||||
*/
|
||||
for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
|
||||
dsp = list_next(&ds->ds_sendstreams, dsp)) {
|
||||
if (dsp->dsa_outfd == zc->zc_cookie &&
|
||||
dsp->dsa_proc == curproc)
|
||||
break;
|
||||
}
|
||||
|
||||
if (dsp != NULL)
|
||||
zc->zc_cookie = *(dsp->dsa_off);
|
||||
else
|
||||
error = ENOENT;
|
||||
|
||||
mutex_exit(&ds->ds_sendstream_lock);
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_inject_fault(zfs_cmd_t *zc)
|
||||
{
|
||||
@ -3968,6 +4090,22 @@ zfs_ioc_clear(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_pool_reopen(zfs_cmd_t *zc)
|
||||
{
|
||||
spa_t *spa;
|
||||
int error;
|
||||
|
||||
error = spa_open(zc->zc_name, &spa, FTAG);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
spa_vdev_state_enter(spa, SCL_NONE);
|
||||
vdev_reopen(spa->spa_root_vdev);
|
||||
(void) spa_vdev_state_exit(spa, NULL, 0);
|
||||
spa_close(spa, FTAG);
|
||||
return (0);
|
||||
}
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of filesystem
|
||||
@ -4621,6 +4759,70 @@ zfs_ioc_get_holds(zfs_cmd_t *zc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name name of new filesystem or snapshot
|
||||
* zc_value full name of old snapshot
|
||||
*
|
||||
* outputs:
|
||||
* zc_cookie space in bytes
|
||||
* zc_objset_type compressed space in bytes
|
||||
* zc_perm_action uncompressed space in bytes
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_space_written(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
dsl_dataset_t *new, *old;
|
||||
|
||||
error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
|
||||
if (error != 0) {
|
||||
dsl_dataset_rele(new, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = dsl_dataset_space_written(old, new, &zc->zc_cookie,
|
||||
&zc->zc_objset_type, &zc->zc_perm_action);
|
||||
dsl_dataset_rele(old, FTAG);
|
||||
dsl_dataset_rele(new, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* inputs:
|
||||
* zc_name full name of last snapshot
|
||||
* zc_value full name of first snapshot
|
||||
*
|
||||
* outputs:
|
||||
* zc_cookie space in bytes
|
||||
* zc_objset_type compressed space in bytes
|
||||
* zc_perm_action uncompressed space in bytes
|
||||
*/
|
||||
static int
|
||||
zfs_ioc_space_snaps(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
dsl_dataset_t *new, *old;
|
||||
|
||||
error = dsl_dataset_hold(zc->zc_name, FTAG, &new);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
error = dsl_dataset_hold(zc->zc_value, FTAG, &old);
|
||||
if (error != 0) {
|
||||
dsl_dataset_rele(new, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = dsl_dataset_space_wouldfree(old, new, &zc->zc_cookie,
|
||||
&zc->zc_objset_type, &zc->zc_perm_action);
|
||||
dsl_dataset_rele(old, FTAG);
|
||||
dsl_dataset_rele(new, FTAG);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* pool create, destroy, and export don't log the history as part of
|
||||
* zfsdev_ioctl, but rather zfs_ioc_pool_create, and zfs_ioc_pool_export
|
||||
@ -4683,7 +4885,7 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_recv, zfs_secpolicy_receive, DATASET_NAME, B_TRUE,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_TRUE,
|
||||
{ zfs_ioc_send, zfs_secpolicy_send, DATASET_NAME, B_FALSE,
|
||||
POOL_CHECK_NONE },
|
||||
{ zfs_ioc_inject_fault, zfs_secpolicy_inject, NO_NAME, B_FALSE,
|
||||
POOL_CHECK_NONE },
|
||||
@ -4697,8 +4899,6 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
|
||||
POOL_CHECK_NONE },
|
||||
{ zfs_ioc_promote, zfs_secpolicy_promote, DATASET_NAME, B_TRUE,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, DATASET_NAME,
|
||||
B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_snapshot, zfs_secpolicy_snapshot, DATASET_NAME, B_TRUE,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_dsobj_to_dsname, zfs_secpolicy_diff, POOL_NAME, B_FALSE,
|
||||
@ -4742,7 +4942,19 @@ static zfs_ioc_vec_t zfs_ioc_vec[] = {
|
||||
{ zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot, DATASET_NAME,
|
||||
B_FALSE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_obj_to_stats, zfs_secpolicy_diff, DATASET_NAME, B_FALSE,
|
||||
POOL_CHECK_SUSPENDED }
|
||||
POOL_CHECK_SUSPENDED },
|
||||
{ zfs_ioc_space_written, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
|
||||
POOL_CHECK_SUSPENDED },
|
||||
{ zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
|
||||
POOL_CHECK_SUSPENDED },
|
||||
{ zfs_ioc_destroy_snaps_nvl, zfs_secpolicy_destroy_recursive,
|
||||
DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_pool_reguid, zfs_secpolicy_config, POOL_NAME, B_TRUE,
|
||||
POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY },
|
||||
{ zfs_ioc_pool_reopen, zfs_secpolicy_config, POOL_NAME, B_TRUE,
|
||||
POOL_CHECK_SUSPENDED },
|
||||
{ zfs_ioc_send_progress, zfs_secpolicy_read, DATASET_NAME, B_FALSE,
|
||||
POOL_CHECK_NONE }
|
||||
};
|
||||
|
||||
int
|
||||
|
@ -383,6 +383,14 @@ vscan_changed_cb(void *arg, uint64_t newval)
|
||||
zfsvfs->z_vscan = newval;
|
||||
}
|
||||
|
||||
static void
|
||||
acl_mode_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = arg;
|
||||
|
||||
zfsvfs->z_acl_mode = newval;
|
||||
}
|
||||
|
||||
static void
|
||||
acl_inherit_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
@ -513,6 +521,8 @@ zfs_register_callbacks(vfs_t *vfsp)
|
||||
"exec", exec_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
"snapdir", snapdir_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
"aclmode", acl_mode_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
"aclinherit", acl_inherit_changed_cb, zfsvfs);
|
||||
error = error ? error : dsl_prop_register(ds,
|
||||
@ -554,6 +564,7 @@ unregister:
|
||||
(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
|
||||
zfsvfs);
|
||||
(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
|
||||
@ -1236,6 +1247,9 @@ zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
|
||||
VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
|
||||
zfsvfs) == 0);
|
||||
|
||||
VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
|
||||
zfsvfs) == 0);
|
||||
|
||||
VERIFY(dsl_prop_unregister(ds, "aclinherit",
|
||||
acl_inherit_changed_cb, zfsvfs) == 0);
|
||||
|
||||
|
@ -2975,7 +2975,8 @@ top:
|
||||
uint64_t acl_obj;
|
||||
new_mode = (pmode & S_IFMT) | (vap->va_mode & ~S_IFMT);
|
||||
|
||||
zfs_acl_chmod_setattr(zp, &aclp, new_mode);
|
||||
if (err = zfs_acl_chmod_setattr(zp, &aclp, new_mode))
|
||||
goto out;
|
||||
|
||||
mutex_enter(&zp->z_lock);
|
||||
if (!zp->z_is_sa && ((acl_obj = zfs_external_acl(zp)) != 0)) {
|
||||
@ -4192,6 +4193,14 @@ zfs_putpage(vnode_t *vp, offset_t off, size_t len, int flags, cred_t *cr,
|
||||
ZFS_ENTER(zfsvfs);
|
||||
ZFS_VERIFY_ZP(zp);
|
||||
|
||||
/*
|
||||
* There's nothing to do if no data is cached.
|
||||
*/
|
||||
if (!vn_has_cached_data(vp)) {
|
||||
ZFS_EXIT(zfsvfs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Align this request to the file block size in case we kluster.
|
||||
* XXX - this can result in pretty aggresive locking, which can
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -560,7 +561,7 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
|
||||
|
||||
if (!list_is_empty(&zilog->zl_lwb_list)) {
|
||||
ASSERT(zh->zh_claim_txg == 0);
|
||||
ASSERT(!keep_first);
|
||||
VERIFY(!keep_first);
|
||||
while ((lwb = list_head(&zilog->zl_lwb_list)) != NULL) {
|
||||
list_remove(&zilog->zl_lwb_list, lwb);
|
||||
if (lwb->lwb_buf != NULL)
|
||||
@ -1661,20 +1662,9 @@ zil_alloc(objset_t *os, zil_header_t *zh_phys)
|
||||
void
|
||||
zil_free(zilog_t *zilog)
|
||||
{
|
||||
lwb_t *head_lwb;
|
||||
|
||||
zilog->zl_stop_sync = 1;
|
||||
|
||||
/*
|
||||
* After zil_close() there should only be one lwb with a buffer.
|
||||
*/
|
||||
head_lwb = list_head(&zilog->zl_lwb_list);
|
||||
if (head_lwb) {
|
||||
ASSERT(head_lwb == list_tail(&zilog->zl_lwb_list));
|
||||
list_remove(&zilog->zl_lwb_list, head_lwb);
|
||||
zio_buf_free(head_lwb->lwb_buf, head_lwb->lwb_sz);
|
||||
kmem_cache_free(zil_lwb_cache, head_lwb);
|
||||
}
|
||||
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||
list_destroy(&zilog->zl_lwb_list);
|
||||
|
||||
avl_destroy(&zilog->zl_vdev_tree);
|
||||
@ -1714,6 +1704,10 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
|
||||
{
|
||||
zilog_t *zilog = dmu_objset_zil(os);
|
||||
|
||||
ASSERT(zilog->zl_clean_taskq == NULL);
|
||||
ASSERT(zilog->zl_get_data == NULL);
|
||||
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||
|
||||
zilog->zl_get_data = get_data;
|
||||
zilog->zl_clean_taskq = taskq_create("zil_clean", 1, minclsyspri,
|
||||
2, 2, TASKQ_PREPOPULATE);
|
||||
@ -1727,7 +1721,7 @@ zil_open(objset_t *os, zil_get_data_t *get_data)
|
||||
void
|
||||
zil_close(zilog_t *zilog)
|
||||
{
|
||||
lwb_t *tail_lwb;
|
||||
lwb_t *lwb;
|
||||
uint64_t txg = 0;
|
||||
|
||||
zil_commit(zilog, 0); /* commit all itx */
|
||||
@ -1739,9 +1733,9 @@ zil_close(zilog_t *zilog)
|
||||
* destroy the zl_clean_taskq.
|
||||
*/
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
tail_lwb = list_tail(&zilog->zl_lwb_list);
|
||||
if (tail_lwb != NULL)
|
||||
txg = tail_lwb->lwb_max_txg;
|
||||
lwb = list_tail(&zilog->zl_lwb_list);
|
||||
if (lwb != NULL)
|
||||
txg = lwb->lwb_max_txg;
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
if (txg)
|
||||
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
||||
@ -1749,6 +1743,19 @@ zil_close(zilog_t *zilog)
|
||||
taskq_destroy(zilog->zl_clean_taskq);
|
||||
zilog->zl_clean_taskq = NULL;
|
||||
zilog->zl_get_data = NULL;
|
||||
|
||||
/*
|
||||
* We should have only one LWB left on the list; remove it now.
|
||||
*/
|
||||
mutex_enter(&zilog->zl_lock);
|
||||
lwb = list_head(&zilog->zl_lwb_list);
|
||||
if (lwb != NULL) {
|
||||
ASSERT(lwb == list_tail(&zilog->zl_lwb_list));
|
||||
list_remove(&zilog->zl_lwb_list, lwb);
|
||||
zio_buf_free(lwb->lwb_buf, lwb->lwb_sz);
|
||||
kmem_cache_free(zil_lwb_cache, lwb);
|
||||
}
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -20,6 +20,8 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -78,6 +80,7 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
|
||||
#ifdef _KERNEL
|
||||
extern vmem_t *zio_alloc_arena;
|
||||
#endif
|
||||
extern int zfs_mg_alloc_failures;
|
||||
|
||||
/*
|
||||
* An allocating zio is one that either currently has the DVA allocate
|
||||
@ -158,6 +161,12 @@ zio_init(void)
|
||||
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
|
||||
}
|
||||
|
||||
/*
|
||||
* The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
|
||||
* to fail 3 times per txg or 8 failures, whichever is greater.
|
||||
*/
|
||||
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
|
||||
|
||||
zio_inject_init();
|
||||
}
|
||||
|
||||
@ -610,7 +619,7 @@ zio_write(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp,
|
||||
zp->zp_checksum < ZIO_CHECKSUM_FUNCTIONS &&
|
||||
zp->zp_compress >= ZIO_COMPRESS_OFF &&
|
||||
zp->zp_compress < ZIO_COMPRESS_FUNCTIONS &&
|
||||
zp->zp_type < DMU_OT_NUMTYPES &&
|
||||
DMU_OT_IS_VALID(zp->zp_type) &&
|
||||
zp->zp_level < 32 &&
|
||||
zp->zp_copies > 0 &&
|
||||
zp->zp_copies <= spa_max_replication(spa) &&
|
||||
@ -894,7 +903,7 @@ zio_read_bp_init(zio_t *zio)
|
||||
zio_push_transform(zio, cbuf, psize, psize, zio_decompress);
|
||||
}
|
||||
|
||||
if (!dmu_ot[BP_GET_TYPE(bp)].ot_metadata && BP_GET_LEVEL(bp) == 0)
|
||||
if (!DMU_OT_IS_METADATA(BP_GET_TYPE(bp)) && BP_GET_LEVEL(bp) == 0)
|
||||
zio->io_flags |= ZIO_FLAG_DONT_CACHE;
|
||||
|
||||
if (BP_GET_TYPE(bp) == DMU_OT_DDT_ZAP)
|
||||
@ -1053,7 +1062,7 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
zio_type_t t = zio->io_type;
|
||||
int flags = TQ_SLEEP | (cutinline ? TQ_FRONT : 0);
|
||||
int flags = (cutinline ? TQ_FRONT : 0);
|
||||
|
||||
/*
|
||||
* If we're a config writer or a probe, the normal issue and
|
||||
@ -1077,8 +1086,15 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q, boolean_t cutinline)
|
||||
q++;
|
||||
|
||||
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
|
||||
(void) taskq_dispatch(spa->spa_zio_taskq[t][q],
|
||||
(task_func_t *)zio_execute, zio, flags);
|
||||
|
||||
/*
|
||||
* NB: We are assuming that the zio can only be dispatched
|
||||
* to a single taskq at a time. It would be a grievous error
|
||||
* to dispatch the zio to another taskq at the same time.
|
||||
*/
|
||||
ASSERT(zio->io_tqent.tqent_next == NULL);
|
||||
taskq_dispatch_ent(spa->spa_zio_taskq[t][q],
|
||||
(task_func_t *)zio_execute, zio, flags, &zio->io_tqent);
|
||||
}
|
||||
|
||||
static boolean_t
|
||||
@ -2114,6 +2130,7 @@ zio_dva_allocate(zio_t *zio)
|
||||
metaslab_class_t *mc = spa_normal_class(spa);
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
int error;
|
||||
int flags = 0;
|
||||
|
||||
if (zio->io_gang_leader == NULL) {
|
||||
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
|
||||
@ -2126,10 +2143,21 @@ zio_dva_allocate(zio_t *zio)
|
||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||
|
||||
/*
|
||||
* The dump device does not support gang blocks so allocation on
|
||||
* behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
|
||||
* the "fast" gang feature.
|
||||
*/
|
||||
flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
|
||||
flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
|
||||
METASLAB_GANG_CHILD : 0;
|
||||
error = metaslab_alloc(spa, mc, zio->io_size, bp,
|
||||
zio->io_prop.zp_copies, zio->io_txg, NULL, 0);
|
||||
zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
|
||||
|
||||
if (error) {
|
||||
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
|
||||
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
|
||||
error);
|
||||
if (error == ENOSPC && zio->io_size > SPA_MINBLOCKSIZE)
|
||||
return (zio_write_gang_block(zio));
|
||||
zio->io_error = error;
|
||||
@ -2191,13 +2219,22 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
|
||||
|
||||
ASSERT(txg > spa_syncing_txg(spa));
|
||||
|
||||
if (use_slog)
|
||||
/*
|
||||
* ZIL blocks are always contiguous (i.e. not gang blocks) so we
|
||||
* set the METASLAB_GANG_AVOID flag so that they don't "fast gang"
|
||||
* when allocating them.
|
||||
*/
|
||||
if (use_slog) {
|
||||
error = metaslab_alloc(spa, spa_log_class(spa), size,
|
||||
new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
|
||||
new_bp, 1, txg, old_bp,
|
||||
METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
|
||||
}
|
||||
|
||||
if (error)
|
||||
if (error) {
|
||||
error = metaslab_alloc(spa, spa_normal_class(spa), size,
|
||||
new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID);
|
||||
new_bp, 1, txg, old_bp,
|
||||
METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
|
||||
}
|
||||
|
||||
if (error == 0) {
|
||||
BP_SET_LSIZE(new_bp, size);
|
||||
@ -2869,9 +2906,11 @@ zio_done(zio_t *zio)
|
||||
* Reexecution is potentially a huge amount of work.
|
||||
* Hand it off to the otherwise-unused claim taskq.
|
||||
*/
|
||||
(void) taskq_dispatch(
|
||||
ASSERT(zio->io_tqent.tqent_next == NULL);
|
||||
(void) taskq_dispatch_ent(
|
||||
spa->spa_zio_taskq[ZIO_TYPE_CLAIM][ZIO_TASKQ_ISSUE],
|
||||
(task_func_t *)zio_reexecute, zio, TQ_SLEEP);
|
||||
(task_func_t *)zio_reexecute, zio, 0,
|
||||
&zio->io_tqent);
|
||||
}
|
||||
return (ZIO_PIPELINE_STOP);
|
||||
}
|
||||
@ -2950,3 +2989,45 @@ static zio_pipe_stage_t *zio_pipeline[] = {
|
||||
zio_checksum_verify,
|
||||
zio_done
|
||||
};
|
||||
|
||||
/* dnp is the dnode for zb1->zb_object */
|
||||
boolean_t
|
||||
zbookmark_is_before(const dnode_phys_t *dnp, const zbookmark_t *zb1,
|
||||
const zbookmark_t *zb2)
|
||||
{
|
||||
uint64_t zb1nextL0, zb2thisobj;
|
||||
|
||||
ASSERT(zb1->zb_objset == zb2->zb_objset);
|
||||
ASSERT(zb2->zb_level == 0);
|
||||
|
||||
/*
|
||||
* A bookmark in the deadlist is considered to be after
|
||||
* everything else.
|
||||
*/
|
||||
if (zb2->zb_object == DMU_DEADLIST_OBJECT)
|
||||
return (B_TRUE);
|
||||
|
||||
/* The objset_phys_t isn't before anything. */
|
||||
if (dnp == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
zb1nextL0 = (zb1->zb_blkid + 1) <<
|
||||
((zb1->zb_level) * (dnp->dn_indblkshift - SPA_BLKPTRSHIFT));
|
||||
|
||||
zb2thisobj = zb2->zb_object ? zb2->zb_object :
|
||||
zb2->zb_blkid << (DNODE_BLOCK_SHIFT - DNODE_SHIFT);
|
||||
|
||||
if (zb1->zb_object == DMU_META_DNODE_OBJECT) {
|
||||
uint64_t nextobj = zb1nextL0 *
|
||||
(dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT) >> DNODE_SHIFT;
|
||||
return (nextobj <= zb2thisobj);
|
||||
}
|
||||
|
||||
if (zb1->zb_object < zb2thisobj)
|
||||
return (B_TRUE);
|
||||
if (zb1->zb_object > zb2thisobj)
|
||||
return (B_FALSE);
|
||||
if (zb2->zb_object == DMU_META_DNODE_OBJECT)
|
||||
return (B_FALSE);
|
||||
return (zb1nextL0 <= zb2->zb_blkid);
|
||||
}
|
||||
|
@ -20,10 +20,12 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
*
|
||||
* Portions Copyright 2010 Robert Milkowski
|
||||
*
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
|
||||
/*
|
||||
* ZFS volume emulation driver.
|
||||
*
|
||||
@ -341,6 +343,24 @@ zvol_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
|
||||
ASSERT(error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Replay a TX_TRUNCATE ZIL transaction if asked. TX_TRUNCATE is how we
|
||||
* implement DKIOCFREE/free-long-range.
|
||||
*/
|
||||
static int
|
||||
zvol_replay_truncate(zvol_state_t *zv, lr_truncate_t *lr, boolean_t byteswap)
|
||||
{
|
||||
uint64_t offset, length;
|
||||
|
||||
if (byteswap)
|
||||
byteswap_uint64_array(lr, sizeof (*lr));
|
||||
|
||||
offset = lr->lr_offset;
|
||||
length = lr->lr_length;
|
||||
|
||||
return (dmu_free_long_range(zv->zv_objset, ZVOL_OBJ, offset, length));
|
||||
}
|
||||
|
||||
/*
|
||||
* Replay a TX_WRITE ZIL transaction that didn't get committed
|
||||
* after a system failure
|
||||
@ -391,7 +411,7 @@ zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
|
||||
|
||||
/*
|
||||
* Callback vectors for replaying records.
|
||||
* Only TX_WRITE is needed for zvol.
|
||||
* Only TX_WRITE and TX_TRUNCATE are needed for zvol.
|
||||
*/
|
||||
zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
|
||||
zvol_replay_err, /* 0 no such transaction type */
|
||||
@ -404,7 +424,7 @@ zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
|
||||
zvol_replay_err, /* TX_LINK */
|
||||
zvol_replay_err, /* TX_RENAME */
|
||||
zvol_replay_write, /* TX_WRITE */
|
||||
zvol_replay_err, /* TX_TRUNCATE */
|
||||
zvol_replay_truncate, /* TX_TRUNCATE */
|
||||
zvol_replay_err, /* TX_SETATTR */
|
||||
zvol_replay_err, /* TX_ACL */
|
||||
zvol_replay_err, /* TX_CREATE_ACL */
|
||||
@ -1511,8 +1531,33 @@ zvol_log_write_minor(void *minor_hdl, dmu_tx_t *tx, offset_t off, ssize_t resid,
|
||||
* END entry points to allow external callers access to the volume.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Log a DKIOCFREE/free-long-range to the ZIL with TX_TRUNCATE.
|
||||
*/
|
||||
static void
|
||||
zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off, uint64_t len,
|
||||
boolean_t sync)
|
||||
{
|
||||
itx_t *itx;
|
||||
lr_truncate_t *lr;
|
||||
zilog_t *zilog = zv->zv_zilog;
|
||||
|
||||
if (zil_replaying(zilog, tx))
|
||||
return;
|
||||
|
||||
itx = zil_itx_create(TX_TRUNCATE, sizeof (*lr));
|
||||
lr = (lr_truncate_t *)&itx->itx_lr;
|
||||
lr->lr_foid = ZVOL_OBJ;
|
||||
lr->lr_offset = off;
|
||||
lr->lr_length = len;
|
||||
|
||||
itx->itx_sync = sync;
|
||||
zil_itx_assign(zilog, itx, tx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dirtbag ioctls to support mkfs(1M) for UFS filesystems. See dkio(7I).
|
||||
* Also a dirtbag dkio ioctl for unmap/free-block functionality.
|
||||
*/
|
||||
/*ARGSUSED*/
|
||||
int
|
||||
@ -1631,6 +1676,65 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
|
||||
zfs_range_unlock(rl);
|
||||
break;
|
||||
|
||||
case DKIOCFREE:
|
||||
{
|
||||
dkioc_free_t df;
|
||||
dmu_tx_t *tx;
|
||||
|
||||
if (ddi_copyin((void *)arg, &df, sizeof (df), flag)) {
|
||||
error = EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply Postel's Law to length-checking. If they overshoot,
|
||||
* just blank out until the end, if there's a need to blank
|
||||
* out anything.
|
||||
*/
|
||||
if (df.df_start >= zv->zv_volsize)
|
||||
break; /* No need to do anything... */
|
||||
if (df.df_start + df.df_length > zv->zv_volsize)
|
||||
df.df_length = DMU_OBJECT_END;
|
||||
|
||||
rl = zfs_range_lock(&zv->zv_znode, df.df_start, df.df_length,
|
||||
RL_WRITER);
|
||||
tx = dmu_tx_create(zv->zv_objset);
|
||||
error = dmu_tx_assign(tx, TXG_WAIT);
|
||||
if (error != 0) {
|
||||
dmu_tx_abort(tx);
|
||||
} else {
|
||||
zvol_log_truncate(zv, tx, df.df_start,
|
||||
df.df_length, B_TRUE);
|
||||
dmu_tx_commit(tx);
|
||||
error = dmu_free_long_range(zv->zv_objset, ZVOL_OBJ,
|
||||
df.df_start, df.df_length);
|
||||
}
|
||||
|
||||
zfs_range_unlock(rl);
|
||||
|
||||
if (error == 0) {
|
||||
/*
|
||||
* If the write-cache is disabled or 'sync' property
|
||||
* is set to 'always' then treat this as a synchronous
|
||||
* operation (i.e. commit to zil).
|
||||
*/
|
||||
if (!(zv->zv_flags & ZVOL_WCE) ||
|
||||
(zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS))
|
||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||
|
||||
/*
|
||||
* If the caller really wants synchronous writes, and
|
||||
* can't wait for them, don't return until the write
|
||||
* is done.
|
||||
*/
|
||||
if (df.df_flags & DF_WAIT_SYNC) {
|
||||
txg_wait_synced(
|
||||
dmu_objset_pool(zv->zv_objset), 0);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
error = ENOTTY;
|
||||
break;
|
||||
|
@ -79,7 +79,7 @@
|
||||
* URL and SUNW-MSG-ID value to display for fm_panic(), defined below. These
|
||||
* values must be kept in sync with the FMA source code in usr/src/cmd/fm.
|
||||
*/
|
||||
static const char *fm_url = "http://www.sun.com/msg";
|
||||
static const char *fm_url = "http://illumos.org/msg";
|
||||
static const char *fm_msgid = "SUNOS-8000-0G";
|
||||
static char *volatile fm_panicstr = NULL;
|
||||
|
||||
|
@ -27,8 +27,6 @@
|
||||
#ifndef _SYS_CCOMPILE_H
|
||||
#define _SYS_CCOMPILE_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* This file contains definitions designed to enable different compilers
|
||||
* to be used harmoniously on Solaris systems.
|
||||
@ -79,6 +77,27 @@ extern "C" {
|
||||
*/
|
||||
#define __sun_attr___noreturn__ __attribute__((__noreturn__))
|
||||
|
||||
/*
|
||||
* The function is 'extern inline' and expects GNU C89 behaviour, not C99
|
||||
* behaviour.
|
||||
*
|
||||
* Should only be used on 'extern inline' definitions for GCC.
|
||||
*/
|
||||
#if __GNUC_VERSION >= 40200
|
||||
#define __sun_attr___gnu_inline__ __attribute__((__gnu_inline__))
|
||||
#else
|
||||
#define __sun_attr___gnu_inline__
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The function has control flow such that it may return multiple times (in
|
||||
* the manner of setjmp or vfork)
|
||||
*/
|
||||
#if __GNUC_VERSION >= 40100
|
||||
#define __sun_attr___returns_twice__ __attribute__((__returns_twice__))
|
||||
#else
|
||||
#define __sun_attr___returns_twice__
|
||||
#endif
|
||||
|
||||
/*
|
||||
* This is an appropriate label for functions that do not
|
||||
@ -116,10 +135,11 @@ extern "C" {
|
||||
#define __KPRINTFLIKE(__n) __sun_attr__((__KPRINTFLIKE__(__n)))
|
||||
#define __KVPRINTFLIKE(__n) __sun_attr__((__KVPRINTFLIKE__(__n)))
|
||||
#define __NORETURN __sun_attr__((__noreturn__))
|
||||
#define __GNU_INLINE __inline__ __sun_attr__((__gnu_inline__))
|
||||
#define __RETURNS_TWICE __sun_attr__((__returns_twice__))
|
||||
#define __CONST __sun_attr__((__const__))
|
||||
#define __PURE __sun_attr__((__pure__))
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -26,17 +26,19 @@
|
||||
/*
|
||||
* Copyright 2004 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Copyright 2012 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_CMN_ERR_H
|
||||
#define _SYS_CMN_ERR_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#if defined(_KERNEL) && !defined(_ASM)
|
||||
#include <sys/va_list.h>
|
||||
#endif
|
||||
|
||||
#include <sys/dditypes.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -56,47 +58,40 @@ extern "C" {
|
||||
/*PRINTFLIKE2*/
|
||||
extern void cmn_err(int, const char *, ...)
|
||||
__KPRINTFLIKE(2);
|
||||
#pragma rarely_called(cmn_err)
|
||||
|
||||
extern void vzcmn_err(zoneid_t, int, const char *, __va_list)
|
||||
__KVPRINTFLIKE(3);
|
||||
#pragma rarely_called(vzcmn_err)
|
||||
|
||||
extern void dev_err(dev_info_t *, int, char *, ...)
|
||||
__KPRINTFLIKE(3);
|
||||
|
||||
extern void vcmn_err(int, const char *, __va_list)
|
||||
__KVPRINTFLIKE(2);
|
||||
#pragma rarely_called(vcmn_err)
|
||||
|
||||
/*PRINTFLIKE3*/
|
||||
extern void zcmn_err(zoneid_t, int, const char *, ...)
|
||||
__KPRINTFLIKE(3);
|
||||
#pragma rarely_called(zcmn_err)
|
||||
|
||||
/*PRINTFLIKE1*/
|
||||
extern void printf(const char *, ...)
|
||||
__KPRINTFLIKE(1);
|
||||
#pragma rarely_called(printf)
|
||||
|
||||
extern void vzprintf(zoneid_t, const char *, __va_list)
|
||||
__KVPRINTFLIKE(2);
|
||||
#pragma rarely_called(vzprintf)
|
||||
|
||||
/*PRINTFLIKE2*/
|
||||
extern void zprintf(zoneid_t, const char *, ...)
|
||||
__KPRINTFLIKE(2);
|
||||
#pragma rarely_called(zprintf)
|
||||
|
||||
extern void vprintf(const char *, __va_list)
|
||||
__KVPRINTFLIKE(1);
|
||||
#pragma rarely_called(vprintf)
|
||||
|
||||
/*PRINTFLIKE1*/
|
||||
extern void uprintf(const char *, ...)
|
||||
__KPRINTFLIKE(1);
|
||||
#pragma rarely_called(uprintf)
|
||||
|
||||
extern void vuprintf(const char *, __va_list)
|
||||
__KVPRINTFLIKE(1);
|
||||
#pragma rarely_called(vuprintf)
|
||||
|
||||
/*PRINTFLIKE3*/
|
||||
extern size_t snprintf(char *, size_t, const char *, ...)
|
||||
@ -112,11 +107,9 @@ extern char *vsprintf(char *, const char *, __va_list)
|
||||
/*PRINTFLIKE1*/
|
||||
extern void panic(const char *, ...)
|
||||
__KPRINTFLIKE(1) __NORETURN;
|
||||
#pragma rarely_called(panic)
|
||||
|
||||
extern void vpanic(const char *, __va_list)
|
||||
__KVPRINTFLIKE(1) __NORETURN;
|
||||
#pragma rarely_called(vpanic)
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* !_ASM */
|
||||
|
@ -24,6 +24,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DTRACE_H
|
||||
#define _SYS_DTRACE_H
|
||||
|
||||
@ -202,6 +206,7 @@ typedef enum dtrace_probespec {
|
||||
#define DIF_VAR_ARGS 0x0000 /* arguments array */
|
||||
#define DIF_VAR_REGS 0x0001 /* registers array */
|
||||
#define DIF_VAR_UREGS 0x0002 /* user registers array */
|
||||
#define DIF_VAR_VMREGS 0x0003 /* virtual machine registers array */
|
||||
#define DIF_VAR_CURTHREAD 0x0100 /* thread pointer */
|
||||
#define DIF_VAR_TIMESTAMP 0x0101 /* timestamp */
|
||||
#define DIF_VAR_VTIMESTAMP 0x0102 /* virtual timestamp */
|
||||
@ -280,8 +285,10 @@ typedef enum dtrace_probespec {
|
||||
#define DIF_SUBR_INET_NTOP 41
|
||||
#define DIF_SUBR_INET_NTOA 42
|
||||
#define DIF_SUBR_INET_NTOA6 43
|
||||
#define DIF_SUBR_TOUPPER 44
|
||||
#define DIF_SUBR_TOLOWER 45
|
||||
|
||||
#define DIF_SUBR_MAX 43 /* max subroutine value */
|
||||
#define DIF_SUBR_MAX 45 /* max subroutine value */
|
||||
|
||||
typedef uint32_t dif_instr_t;
|
||||
|
||||
@ -390,6 +397,8 @@ typedef struct dtrace_difv {
|
||||
#define DTRACEACT_PRINTF 3 /* printf() action */
|
||||
#define DTRACEACT_PRINTA 4 /* printa() action */
|
||||
#define DTRACEACT_LIBACT 5 /* library-controlled action */
|
||||
#define DTRACEACT_TRACEMEM 6 /* tracemem() action */
|
||||
#define DTRACEACT_TRACEMEM_DYNSIZE 7 /* dynamic tracemem() size */
|
||||
|
||||
#define DTRACEACT_PROC 0x0100
|
||||
#define DTRACEACT_USTACK (DTRACEACT_PROC + 1)
|
||||
@ -455,6 +464,7 @@ typedef struct dtrace_difv {
|
||||
#define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6)
|
||||
#define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7)
|
||||
#define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8)
|
||||
#define DTRACEAGG_LLQUANTIZE (DTRACEACT_AGGREGATION + 9)
|
||||
|
||||
#define DTRACEACT_ISAGG(x) \
|
||||
(DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION)
|
||||
@ -489,6 +499,31 @@ typedef struct dtrace_difv {
|
||||
(int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \
|
||||
DTRACE_LQUANTIZE_BASESHIFT)
|
||||
|
||||
#define DTRACE_LLQUANTIZE_FACTORSHIFT 48
|
||||
#define DTRACE_LLQUANTIZE_FACTORMASK ((uint64_t)UINT16_MAX << 48)
|
||||
#define DTRACE_LLQUANTIZE_LOWSHIFT 32
|
||||
#define DTRACE_LLQUANTIZE_LOWMASK ((uint64_t)UINT16_MAX << 32)
|
||||
#define DTRACE_LLQUANTIZE_HIGHSHIFT 16
|
||||
#define DTRACE_LLQUANTIZE_HIGHMASK ((uint64_t)UINT16_MAX << 16)
|
||||
#define DTRACE_LLQUANTIZE_NSTEPSHIFT 0
|
||||
#define DTRACE_LLQUANTIZE_NSTEPMASK UINT16_MAX
|
||||
|
||||
#define DTRACE_LLQUANTIZE_FACTOR(x) \
|
||||
(uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \
|
||||
DTRACE_LLQUANTIZE_FACTORSHIFT)
|
||||
|
||||
#define DTRACE_LLQUANTIZE_LOW(x) \
|
||||
(uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \
|
||||
DTRACE_LLQUANTIZE_LOWSHIFT)
|
||||
|
||||
#define DTRACE_LLQUANTIZE_HIGH(x) \
|
||||
(uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \
|
||||
DTRACE_LLQUANTIZE_HIGHSHIFT)
|
||||
|
||||
#define DTRACE_LLQUANTIZE_NSTEP(x) \
|
||||
(uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \
|
||||
DTRACE_LLQUANTIZE_NSTEPSHIFT)
|
||||
|
||||
#define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX)
|
||||
#define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32)
|
||||
#define DTRACE_USTACK_ARG(x, y) \
|
||||
@ -1321,7 +1356,7 @@ typedef struct dof_helper {
|
||||
* dtps_resume() <-- Resume specified probe
|
||||
* dtps_getargdesc() <-- Get the argument description for args[X]
|
||||
* dtps_getargval() <-- Get the value for an argX or args[X] variable
|
||||
* dtps_usermode() <-- Find out if the probe was fired in user mode
|
||||
* dtps_mode() <-- Return the mode of the fired probe
|
||||
* dtps_destroy() <-- Destroy all state associated with this probe
|
||||
*
|
||||
* 1.2 void dtps_provide(void *arg, const dtrace_probedesc_t *spec)
|
||||
@ -1570,24 +1605,32 @@ typedef struct dof_helper {
|
||||
* This is called from within dtrace_probe() meaning that interrupts
|
||||
* are disabled. No locks should be taken within this entry point.
|
||||
*
|
||||
* 1.10 int dtps_usermode(void *arg, dtrace_id_t id, void *parg)
|
||||
* 1.10 int dtps_mode(void *arg, dtrace_id_t id, void *parg)
|
||||
*
|
||||
* 1.10.1 Overview
|
||||
*
|
||||
* Called to determine if the probe was fired in a user context.
|
||||
* Called to determine the mode of a fired probe.
|
||||
*
|
||||
* 1.10.2 Arguments and notes
|
||||
*
|
||||
* The first argument is the cookie as passed to dtrace_register(). The
|
||||
* second argument is the identifier of the current probe. The third
|
||||
* second argument is the identifier of the current probe. The third
|
||||
* argument is the probe argument as passed to dtrace_probe_create(). This
|
||||
* entry point must not be left NULL for providers whose probes allow for
|
||||
* mixed mode tracing, that is to say those probes that can fire during
|
||||
* kernel- _or_ user-mode execution
|
||||
* mixed mode tracing, that is to say those unanchored probes that can fire
|
||||
* during kernel- or user-mode execution.
|
||||
*
|
||||
* 1.10.3 Return value
|
||||
*
|
||||
* A boolean value.
|
||||
* A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL
|
||||
* or DTRACE_MODE_USER) and the policy when the privilege of the enabling
|
||||
* is insufficient for that mode (either DTRACE_MODE_NOPRIV_DROP or
|
||||
* DTRACE_MODE_NOPRIV_RESTRICT). If the policy is DTRACE_MODE_NOPRIV_DROP,
|
||||
* insufficient privilege will result in the probe firing being silently
|
||||
* ignored for the enabling; if the policy is DTRACE_NODE_NOPRIV_RESTRICT,
|
||||
* insufficient privilege will not prevent probe processing for the
|
||||
* enabling, but restrictions will be in place that induce a UPRIV fault
|
||||
* upon attempt to examine probe arguments or current process state.
|
||||
*
|
||||
* 1.10.4 Caller's context
|
||||
*
|
||||
@ -1978,10 +2021,15 @@ typedef struct dtrace_pops {
|
||||
dtrace_argdesc_t *desc);
|
||||
uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg,
|
||||
int argno, int aframes);
|
||||
int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg);
|
||||
int (*dtps_mode)(void *arg, dtrace_id_t id, void *parg);
|
||||
void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg);
|
||||
} dtrace_pops_t;
|
||||
|
||||
#define DTRACE_MODE_KERNEL 0x01
|
||||
#define DTRACE_MODE_USER 0x02
|
||||
#define DTRACE_MODE_NOPRIV_DROP 0x10
|
||||
#define DTRACE_MODE_NOPRIV_RESTRICT 0x20
|
||||
|
||||
typedef uintptr_t dtrace_provider_id_t;
|
||||
|
||||
extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t,
|
||||
|
@ -24,11 +24,13 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_DTRACE_IMPL_H
|
||||
#define _SYS_DTRACE_IMPL_H
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -419,8 +421,11 @@ typedef struct dtrace_buffer {
|
||||
uint32_t dtb_errors; /* number of errors */
|
||||
uint32_t dtb_xamot_errors; /* errors in inactive buffer */
|
||||
#ifndef _LP64
|
||||
uint64_t dtb_pad1;
|
||||
uint64_t dtb_pad1; /* pad out to 64 bytes */
|
||||
#endif
|
||||
uint64_t dtb_switched; /* time of last switch */
|
||||
uint64_t dtb_interval; /* observed switch interval */
|
||||
uint64_t dtb_pad2[6]; /* pad to avoid false sharing */
|
||||
} dtrace_buffer_t;
|
||||
|
||||
/*
|
||||
@ -924,7 +929,8 @@ typedef struct dtrace_mstate {
|
||||
* Access flag used by dtrace_mstate.dtms_access.
|
||||
*/
|
||||
#define DTRACE_ACCESS_KERNEL 0x1 /* the priv to read kmem */
|
||||
|
||||
#define DTRACE_ACCESS_PROC 0x2 /* the priv for proc state */
|
||||
#define DTRACE_ACCESS_ARGS 0x4 /* the priv to examine args */
|
||||
|
||||
/*
|
||||
* DTrace Activity
|
||||
@ -1139,7 +1145,7 @@ struct dtrace_provider {
|
||||
dtrace_pops_t dtpv_pops; /* provider operations */
|
||||
char *dtpv_name; /* provider name */
|
||||
void *dtpv_arg; /* provider argument */
|
||||
uint_t dtpv_defunct; /* boolean: defunct provider */
|
||||
hrtime_t dtpv_defunct; /* when made defunct */
|
||||
struct dtrace_provider *dtpv_next; /* next provider */
|
||||
};
|
||||
|
||||
@ -1246,6 +1252,7 @@ extern void dtrace_copyoutstr(uintptr_t, uintptr_t, size_t,
|
||||
volatile uint16_t *);
|
||||
extern void dtrace_getpcstack(pc_t *, int, int, uint32_t *);
|
||||
extern ulong_t dtrace_getreg(struct regs *, uint_t);
|
||||
extern uint64_t dtrace_getvmreg(uint_t, volatile uint16_t *);
|
||||
extern int dtrace_getstackdepth(int);
|
||||
extern void dtrace_getupcstack(uint64_t *, int);
|
||||
extern void dtrace_getufpstack(uint64_t *, uint64_t *, int);
|
||||
|
@ -21,6 +21,9 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -87,7 +90,7 @@ typedef enum {
|
||||
ZFS_PROP_READONLY,
|
||||
ZFS_PROP_ZONED,
|
||||
ZFS_PROP_SNAPDIR,
|
||||
ZFS_PROP_PRIVATE, /* not exposed to user, temporary */
|
||||
ZFS_PROP_ACLMODE,
|
||||
ZFS_PROP_ACLINHERIT,
|
||||
ZFS_PROP_CREATETXG, /* not exposed to the user */
|
||||
ZFS_PROP_NAME, /* not exposed to the user */
|
||||
@ -122,6 +125,9 @@ typedef enum {
|
||||
ZFS_PROP_DEDUP,
|
||||
ZFS_PROP_MLSLABEL,
|
||||
ZFS_PROP_SYNC,
|
||||
ZFS_PROP_REFRATIO,
|
||||
ZFS_PROP_WRITTEN,
|
||||
ZFS_PROP_CLONES,
|
||||
ZFS_NUM_PROPS
|
||||
} zfs_prop_t;
|
||||
|
||||
@ -161,9 +167,15 @@ typedef enum {
|
||||
ZPOOL_PROP_FREE,
|
||||
ZPOOL_PROP_ALLOCATED,
|
||||
ZPOOL_PROP_READONLY,
|
||||
ZPOOL_PROP_COMMENT,
|
||||
ZPOOL_PROP_EXPANDSZ,
|
||||
ZPOOL_PROP_FREEING,
|
||||
ZPOOL_NUM_PROPS
|
||||
} zpool_prop_t;
|
||||
|
||||
/* Small enough to not hog a whole line of printout in zpool(1M). */
|
||||
#define ZPROP_MAX_COMMENT 32
|
||||
|
||||
#define ZPROP_CONT -2
|
||||
#define ZPROP_INVAL -1
|
||||
|
||||
@ -218,6 +230,7 @@ const char *zfs_prop_to_name(zfs_prop_t);
|
||||
zfs_prop_t zfs_name_to_prop(const char *);
|
||||
boolean_t zfs_prop_user(const char *);
|
||||
boolean_t zfs_prop_userquota(const char *);
|
||||
boolean_t zfs_prop_written(const char *);
|
||||
int zfs_prop_index_to_string(zfs_prop_t, uint64_t, const char **);
|
||||
int zfs_prop_string_to_index(zfs_prop_t, const char *, uint64_t *);
|
||||
uint64_t zfs_prop_random_value(zfs_prop_t, uint64_t seed);
|
||||
@ -231,6 +244,8 @@ const char *zpool_prop_to_name(zpool_prop_t);
|
||||
const char *zpool_prop_default_string(zpool_prop_t);
|
||||
uint64_t zpool_prop_default_numeric(zpool_prop_t);
|
||||
boolean_t zpool_prop_readonly(zpool_prop_t);
|
||||
boolean_t zpool_prop_feature(const char *);
|
||||
boolean_t zpool_prop_unsupported(const char *name);
|
||||
int zpool_prop_index_to_string(zpool_prop_t, uint64_t, const char **);
|
||||
int zpool_prop_string_to_index(zpool_prop_t, const char *, uint64_t *);
|
||||
uint64_t zpool_prop_random_value(zpool_prop_t, uint64_t seed);
|
||||
@ -338,6 +353,7 @@ typedef enum {
|
||||
#define SPA_VERSION_26 26ULL
|
||||
#define SPA_VERSION_27 27ULL
|
||||
#define SPA_VERSION_28 28ULL
|
||||
#define SPA_VERSION_5000 5000ULL
|
||||
|
||||
/*
|
||||
* When bumping up SPA_VERSION, make sure GRUB ZFS understands the on-disk
|
||||
@ -345,8 +361,8 @@ typedef enum {
|
||||
* and do the appropriate changes. Also bump the version number in
|
||||
* usr/src/grub/capability.
|
||||
*/
|
||||
#define SPA_VERSION SPA_VERSION_28
|
||||
#define SPA_VERSION_STRING "28"
|
||||
#define SPA_VERSION SPA_VERSION_5000
|
||||
#define SPA_VERSION_STRING "5000"
|
||||
|
||||
/*
|
||||
* Symbolic names for the changes that caused a SPA_VERSION switch.
|
||||
@ -397,6 +413,12 @@ typedef enum {
|
||||
#define SPA_VERSION_DEADLISTS SPA_VERSION_26
|
||||
#define SPA_VERSION_FAST_SNAP SPA_VERSION_27
|
||||
#define SPA_VERSION_MULTI_REPLACE SPA_VERSION_28
|
||||
#define SPA_VERSION_BEFORE_FEATURES SPA_VERSION_28
|
||||
#define SPA_VERSION_FEATURES SPA_VERSION_5000
|
||||
|
||||
#define SPA_VERSION_IS_SUPPORTED(v) \
|
||||
(((v) >= SPA_VERSION_INITIAL && (v) <= SPA_VERSION_BEFORE_FEATURES) || \
|
||||
((v) >= SPA_VERSION_FEATURES && (v) <= SPA_VERSION))
|
||||
|
||||
/*
|
||||
* ZPL version - rev'd whenever an incompatible on-disk format change
|
||||
@ -488,11 +510,17 @@ typedef struct zpool_rewind_policy {
|
||||
#define ZPOOL_CONFIG_SPLIT_LIST "guid_list"
|
||||
#define ZPOOL_CONFIG_REMOVING "removing"
|
||||
#define ZPOOL_CONFIG_RESILVERING "resilvering"
|
||||
#define ZPOOL_CONFIG_COMMENT "comment"
|
||||
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_LOAD_INFO "load_info" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_REWIND_INFO "rewind_info" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_UNSUP_FEAT "unsup_feat" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_CAN_RDONLY "can_rdonly" /* not stored on disk */
|
||||
#define ZPOOL_CONFIG_FEATURES_FOR_READ "features_for_read"
|
||||
#define ZPOOL_CONFIG_FEATURE_STATS "feature_stats" /* not stored on disk */
|
||||
/*
|
||||
* The persistent vdev state is stored as separate values rather than a single
|
||||
* 'vdev_state' entry. This is because a device can be in multiple states, such
|
||||
@ -571,6 +599,7 @@ typedef enum vdev_aux {
|
||||
VDEV_AUX_BAD_LABEL, /* the label is OK but invalid */
|
||||
VDEV_AUX_VERSION_NEWER, /* on-disk version is too new */
|
||||
VDEV_AUX_VERSION_OLDER, /* on-disk version is too old */
|
||||
VDEV_AUX_UNSUP_FEAT, /* unsupported features */
|
||||
VDEV_AUX_SPARED, /* hot spare used in another pool */
|
||||
VDEV_AUX_ERR_EXCEEDED, /* too many errors */
|
||||
VDEV_AUX_IO_FAILURE, /* experienced I/O failure */
|
||||
@ -661,6 +690,7 @@ typedef struct vdev_stat {
|
||||
uint64_t vs_space; /* total capacity */
|
||||
uint64_t vs_dspace; /* deflated capacity */
|
||||
uint64_t vs_rsize; /* replaceable dev size */
|
||||
uint64_t vs_esize; /* expandable dev size */
|
||||
uint64_t vs_ops[ZIO_TYPES]; /* operation count */
|
||||
uint64_t vs_bytes[ZIO_TYPES]; /* bytes read/written */
|
||||
uint64_t vs_read_errors; /* read errors */
|
||||
@ -752,7 +782,6 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_ERROR_LOG,
|
||||
ZFS_IOC_CLEAR,
|
||||
ZFS_IOC_PROMOTE,
|
||||
ZFS_IOC_DESTROY_SNAPS,
|
||||
ZFS_IOC_SNAPSHOT,
|
||||
ZFS_IOC_DSOBJ_TO_DSNAME,
|
||||
ZFS_IOC_OBJ_TO_PATH,
|
||||
@ -774,7 +803,13 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_NEXT_OBJ,
|
||||
ZFS_IOC_DIFF,
|
||||
ZFS_IOC_TMP_SNAPSHOT,
|
||||
ZFS_IOC_OBJ_TO_STATS
|
||||
ZFS_IOC_OBJ_TO_STATS,
|
||||
ZFS_IOC_SPACE_WRITTEN,
|
||||
ZFS_IOC_SPACE_SNAPS,
|
||||
ZFS_IOC_DESTROY_SNAPS_NVL,
|
||||
ZFS_IOC_POOL_REGUID,
|
||||
ZFS_IOC_POOL_REOPEN,
|
||||
ZFS_IOC_SEND_PROGRESS
|
||||
} zfs_ioc_t;
|
||||
|
||||
/*
|
||||
@ -837,6 +872,7 @@ typedef enum {
|
||||
* ESC_ZFS_RESILVER_START
|
||||
* ESC_ZFS_RESILVER_END
|
||||
* ESC_ZFS_POOL_DESTROY
|
||||
* ESC_ZFS_POOL_REGUID
|
||||
*
|
||||
* ZFS_EV_POOL_NAME DATA_TYPE_STRING
|
||||
* ZFS_EV_POOL_GUID DATA_TYPE_UINT64
|
||||
|
@ -20,12 +20,14 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_NVPAIR_H
|
||||
#define _SYS_NVPAIR_H
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/errno.h>
|
||||
#include <sys/va_list.h>
|
||||
|
||||
@ -274,6 +276,73 @@ int nvpair_value_hrtime(nvpair_t *, hrtime_t *);
|
||||
int nvpair_value_double(nvpair_t *, double *);
|
||||
#endif
|
||||
|
||||
nvlist_t *fnvlist_alloc(void);
|
||||
void fnvlist_free(nvlist_t *);
|
||||
size_t fnvlist_size(nvlist_t *);
|
||||
char *fnvlist_pack(nvlist_t *, size_t *);
|
||||
void fnvlist_pack_free(char *, size_t);
|
||||
nvlist_t *fnvlist_unpack(char *, size_t);
|
||||
nvlist_t *fnvlist_dup(nvlist_t *);
|
||||
void fnvlist_merge(nvlist_t *, nvlist_t *);
|
||||
|
||||
void fnvlist_add_boolean(nvlist_t *, const char *);
|
||||
void fnvlist_add_boolean_value(nvlist_t *, const char *, boolean_t);
|
||||
void fnvlist_add_byte(nvlist_t *, const char *, uchar_t);
|
||||
void fnvlist_add_int8(nvlist_t *, const char *, int8_t);
|
||||
void fnvlist_add_uint8(nvlist_t *, const char *, uint8_t);
|
||||
void fnvlist_add_int16(nvlist_t *, const char *, int16_t);
|
||||
void fnvlist_add_uint16(nvlist_t *, const char *, uint16_t);
|
||||
void fnvlist_add_int32(nvlist_t *, const char *, int32_t);
|
||||
void fnvlist_add_uint32(nvlist_t *, const char *, uint32_t);
|
||||
void fnvlist_add_int64(nvlist_t *, const char *, int64_t);
|
||||
void fnvlist_add_uint64(nvlist_t *, const char *, uint64_t);
|
||||
void fnvlist_add_string(nvlist_t *, const char *, const char *);
|
||||
void fnvlist_add_nvlist(nvlist_t *, const char *, nvlist_t *);
|
||||
void fnvlist_add_nvpair(nvlist_t *, nvpair_t *);
|
||||
void fnvlist_add_boolean_array(nvlist_t *, const char *, boolean_t *, uint_t);
|
||||
void fnvlist_add_byte_array(nvlist_t *, const char *, uchar_t *, uint_t);
|
||||
void fnvlist_add_int8_array(nvlist_t *, const char *, int8_t *, uint_t);
|
||||
void fnvlist_add_uint8_array(nvlist_t *, const char *, uint8_t *, uint_t);
|
||||
void fnvlist_add_int16_array(nvlist_t *, const char *, int16_t *, uint_t);
|
||||
void fnvlist_add_uint16_array(nvlist_t *, const char *, uint16_t *, uint_t);
|
||||
void fnvlist_add_int32_array(nvlist_t *, const char *, int32_t *, uint_t);
|
||||
void fnvlist_add_uint32_array(nvlist_t *, const char *, uint32_t *, uint_t);
|
||||
void fnvlist_add_int64_array(nvlist_t *, const char *, int64_t *, uint_t);
|
||||
void fnvlist_add_uint64_array(nvlist_t *, const char *, uint64_t *, uint_t);
|
||||
void fnvlist_add_string_array(nvlist_t *, const char *, char * const *, uint_t);
|
||||
void fnvlist_add_nvlist_array(nvlist_t *, const char *, nvlist_t **, uint_t);
|
||||
|
||||
void fnvlist_remove(nvlist_t *, const char *);
|
||||
void fnvlist_remove_nvpair(nvlist_t *, nvpair_t *);
|
||||
|
||||
nvpair_t *fnvlist_lookup_nvpair(nvlist_t *nvl, const char *name);
|
||||
boolean_t fnvlist_lookup_boolean(nvlist_t *nvl, const char *name);
|
||||
boolean_t fnvlist_lookup_boolean_value(nvlist_t *nvl, const char *name);
|
||||
uchar_t fnvlist_lookup_byte(nvlist_t *nvl, const char *name);
|
||||
int8_t fnvlist_lookup_int8(nvlist_t *nvl, const char *name);
|
||||
int16_t fnvlist_lookup_int16(nvlist_t *nvl, const char *name);
|
||||
int32_t fnvlist_lookup_int32(nvlist_t *nvl, const char *name);
|
||||
int64_t fnvlist_lookup_int64(nvlist_t *nvl, const char *name);
|
||||
uint8_t fnvlist_lookup_uint8_t(nvlist_t *nvl, const char *name);
|
||||
uint16_t fnvlist_lookup_uint16(nvlist_t *nvl, const char *name);
|
||||
uint32_t fnvlist_lookup_uint32(nvlist_t *nvl, const char *name);
|
||||
uint64_t fnvlist_lookup_uint64(nvlist_t *nvl, const char *name);
|
||||
char *fnvlist_lookup_string(nvlist_t *nvl, const char *name);
|
||||
nvlist_t *fnvlist_lookup_nvlist(nvlist_t *nvl, const char *name);
|
||||
|
||||
boolean_t fnvpair_value_boolean_value(nvpair_t *nvp);
|
||||
uchar_t fnvpair_value_byte(nvpair_t *nvp);
|
||||
int8_t fnvpair_value_int8(nvpair_t *nvp);
|
||||
int16_t fnvpair_value_int16(nvpair_t *nvp);
|
||||
int32_t fnvpair_value_int32(nvpair_t *nvp);
|
||||
int64_t fnvpair_value_int64(nvpair_t *nvp);
|
||||
uint8_t fnvpair_value_uint8_t(nvpair_t *nvp);
|
||||
uint16_t fnvpair_value_uint16(nvpair_t *nvp);
|
||||
uint32_t fnvpair_value_uint32(nvpair_t *nvp);
|
||||
uint64_t fnvpair_value_uint64(nvpair_t *nvp);
|
||||
char *fnvpair_value_string(nvpair_t *nvp);
|
||||
nvlist_t *fnvpair_value_nvlist(nvpair_t *nvp);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SYSEVENT_EVENTDEFS_H
|
||||
@ -256,6 +257,7 @@ extern "C" {
|
||||
#define ESC_ZFS_SCRUB_FINISH "ESC_ZFS_scrub_finish"
|
||||
#define ESC_ZFS_VDEV_SPARE "ESC_ZFS_vdev_spare"
|
||||
#define ESC_ZFS_BOOTFS_VDEV_ATTACH "ESC_ZFS_bootfs_vdev_attach"
|
||||
#define ESC_ZFS_POOL_REGUID "ESC_ZFS_pool_reguid"
|
||||
|
||||
/*
|
||||
* datalink subclass definitions.
|
||||
|
@ -25,6 +25,8 @@
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*
|
||||
* Copyright 2011, 2012 Nexenta Systems, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SYSMACROS_H
|
||||
@ -364,12 +366,18 @@ extern unsigned char bcd_to_byte[256];
|
||||
#error One of _BIT_FIELDS_LTOH or _BIT_FIELDS_HTOL must be defined
|
||||
#endif /* _BIT_FIELDS_LTOH */
|
||||
|
||||
#if defined(_KERNEL) && !defined(_KMEMUSER) && !defined(offsetof)
|
||||
|
||||
/* avoid any possibility of clashing with <stddef.h> version */
|
||||
#if defined(_KERNEL) && !defined(_KMEMUSER)
|
||||
|
||||
#if !defined(offsetof)
|
||||
#define offsetof(s, m) ((size_t)(&(((s *)0)->m)))
|
||||
#endif
|
||||
#endif /* !offsetof */
|
||||
|
||||
#define container_of(m, s, name) \
|
||||
(void *)((uintptr_t)(m) - (uintptr_t)offsetof(s, name))
|
||||
|
||||
#define ARRAY_SIZE(x) (sizeof (x) / sizeof (x[0]))
|
||||
#endif /* _KERNEL, !_KMEMUSER */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user