Update vendor/illumos/dist and vendor/illumos-gate/dist

to illumos-gate revision 13782:8f78aae28a63

Obtained from:	ssh://anonhg@hg.illumos.org/illumos-gate
This commit is contained in:
Martin Matuska 2012-08-27 19:25:20 +00:00
parent 5e12fc6c1c
commit d18d85e243
22 changed files with 198 additions and 47 deletions

View File

@ -294,11 +294,9 @@ static int
iter_handlers(int (*func)(int, const char *, zinject_record_t *, void *),
void *data)
{
zfs_cmd_t zc;
zfs_cmd_t zc = { 0 };
int ret;
zc.zc_guid = 0;
while (ioctl(zfs_fd, ZFS_IOC_INJECT_LIST_NEXT, &zc) == 0)
if ((ret = func((int)zc.zc_guid, zc.zc_name,
&zc.zc_inject_record, data)) != 0)
@ -421,7 +419,7 @@ static int
cancel_one_handler(int id, const char *pool, zinject_record_t *record,
void *data)
{
zfs_cmd_t zc;
zfs_cmd_t zc = { 0 };
zc.zc_guid = (uint64_t)id;
@ -454,7 +452,7 @@ cancel_all_handlers(void)
static int
cancel_handler(int id)
{
zfs_cmd_t zc;
zfs_cmd_t zc = { 0 };
zc.zc_guid = (uint64_t)id;
@ -476,7 +474,7 @@ static int
register_handler(const char *pool, int flags, zinject_record_t *record,
int quiet)
{
zfs_cmd_t zc;
zfs_cmd_t zc = { 0 };
(void) strcpy(zc.zc_name, pool);
zc.zc_inject_record = *record;
@ -533,7 +531,7 @@ register_handler(const char *pool, int flags, zinject_record_t *record,
int
perform_action(const char *pool, zinject_record_t *record, int cmd)
{
zfs_cmd_t zc;
zfs_cmd_t zc = { 0 };
ASSERT(cmd == VDEV_STATE_DEGRADED || cmd == VDEV_STATE_FAULTED);
(void) strlcpy(zc.zc_name, pool, sizeof (zc.zc_name));

View File

@ -153,4 +153,7 @@ zpool_feature_init(void)
zfeature_register(SPA_FEATURE_ASYNC_DESTROY,
"com.delphix:async_destroy", "async_destroy",
"Destroy filesystems asynchronously.", B_TRUE, B_FALSE, NULL);
zfeature_register(SPA_FEATURE_EMPTY_BPOBJ,
"com.delphix:empty_bpobj", "empty_bpobj",
"Snapshots use less space.", B_TRUE, B_FALSE, NULL);
}

View File

@ -51,6 +51,7 @@ typedef int (zfeature_func_t)(zfeature_info_t *fi, void *arg);
enum spa_feature {
SPA_FEATURE_ASYNC_DESTROY,
SPA_FEATURE_EMPTY_BPOBJ,
SPA_FEATURES
} spa_feature_t;

View File

@ -3511,7 +3511,7 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
zhp->zfs_type == ZFS_TYPE_VOLUME);
/*
* Destroy all recent snapshots and its dependends.
* Destroy all recent snapshots and their dependents.
*/
cb.cb_force = force;
cb.cb_target = snap->zfs_name;

View File

@ -169,5 +169,33 @@ through the \fBfreeing\fR property.
This feature is only \fBactive\fR while \fBfreeing\fR is non\-zero.
.RE
.sp
.ne 2
.na
\fB\fBempty_bpobj\fR\fR
.ad
.RS 4n
.TS
l l .
GUID com.delphix:empty_bpobj
READ\-ONLY COMPATIBLE yes
DEPENDENCIES none
.TE
This feature increases the performance of creating and using a large
number of snapshots of a single filesystem or volume, and also reduces
the disk space required.
When there are many snapshots, each snapshot uses many Block Pointer
Objects (bpobj's) to track blocks associated with that snapshot.
However, in common use cases, most of these bpobj's are empty. This
feature allows us to create each bpobj on-demand, thus eliminating the
empty bpobjs.
This feature is \fBactive\fR while there are any filesystems, volumes,
or snapshots which were created after enabling this feature.
.RE
.SH "SEE ALSO"
\fBzpool\fR(1M)

View File

@ -20,13 +20,61 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/bpobj.h>
#include <sys/zfs_context.h>
#include <sys/refcount.h>
#include <sys/dsl_pool.h>
#include <sys/zfeature.h>
#include <sys/zap.h>
/*
* Return an empty bpobj, preferably the empty dummy one (dp_empty_bpobj).
*/
uint64_t
bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
{
zfeature_info_t *empty_bpobj_feat =
&spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ];
spa_t *spa = dmu_objset_spa(os);
dsl_pool_t *dp = dmu_objset_pool(os);
if (spa_feature_is_enabled(spa, empty_bpobj_feat)) {
if (!spa_feature_is_active(spa, empty_bpobj_feat)) {
ASSERT3U(dp->dp_empty_bpobj, ==, 0);
dp->dp_empty_bpobj =
bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx);
VERIFY(zap_add(os,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
&dp->dp_empty_bpobj, tx) == 0);
}
spa_feature_incr(spa, empty_bpobj_feat, tx);
ASSERT(dp->dp_empty_bpobj != 0);
return (dp->dp_empty_bpobj);
} else {
return (bpobj_alloc(os, blocksize, tx));
}
}
void
bpobj_decr_empty(objset_t *os, dmu_tx_t *tx)
{
zfeature_info_t *empty_bpobj_feat =
&spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ];
dsl_pool_t *dp = dmu_objset_pool(os);
spa_feature_decr(dmu_objset_spa(os), empty_bpobj_feat, tx);
if (!spa_feature_is_active(dmu_objset_spa(os), empty_bpobj_feat)) {
VERIFY3U(0, ==, zap_remove(dp->dp_meta_objset,
DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_EMPTY_BPOBJ, tx));
VERIFY3U(0, ==, dmu_object_free(os, dp->dp_empty_bpobj, tx));
dp->dp_empty_bpobj = 0;
}
}
uint64_t
bpobj_alloc(objset_t *os, int blocksize, dmu_tx_t *tx)
@ -53,6 +101,7 @@ bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx)
int epb;
dmu_buf_t *dbuf = NULL;
ASSERT(obj != dmu_objset_pool(os)->dp_empty_bpobj);
VERIFY3U(0, ==, bpobj_open(&bpo, os, obj));
mutex_enter(&bpo.bpo_lock);
@ -320,6 +369,12 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
ASSERT(bpo->bpo_havesubobj);
ASSERT(bpo->bpo_havecomp);
ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) {
bpobj_decr_empty(bpo->bpo_os, tx);
return;
}
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
@ -388,6 +443,7 @@ bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, dmu_tx_t *tx)
blkptr_t *bparray;
ASSERT(!BP_IS_HOLE(bp));
ASSERT(bpo->bpo_object != dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj);
/* We never need the fill count. */
stored_bp.blk_fill = 0;

View File

@ -1236,15 +1236,6 @@ dmu_objset_is_dirty(objset_t *os, uint64_t txg)
!list_is_empty(&os->os_free_dnodes[txg & TXG_MASK]));
}
boolean_t
dmu_objset_is_dirty_anywhere(objset_t *os)
{
for (int t = 0; t < TXG_SIZE; t++)
if (dmu_objset_is_dirty(os, t))
return (B_TRUE);
return (B_FALSE);
}
static objset_used_cb_t *used_cbs[DMU_OST_NUMTYPES];
void

View File

@ -1210,6 +1210,17 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
}
}
boolean_t
dsl_dataset_is_dirty(dsl_dataset_t *ds)
{
for (int t = 0; t < TXG_SIZE; t++) {
if (txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
ds, t))
return (B_TRUE);
}
return (B_FALSE);
}
/*
* The unique space in the head dataset can be calculated by subtracting
* the space used in the most recent snapshot, that is still being used
@ -3432,9 +3443,6 @@ dsl_dataset_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
if (ds->ds_quota != effective_value) {
dmu_buf_will_dirty(ds->ds_dbuf, tx);
ds->ds_quota = effective_value;
spa_history_log_internal_ds(ds, "set refquota", tx,
"refquota=%lld", (longlong_t)ds->ds_quota);
}
}
@ -3538,9 +3546,6 @@ dsl_dataset_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
dsl_dir_diduse_space(ds->ds_dir, DD_USED_REFRSRV, delta, 0, 0, tx);
mutex_exit(&ds->ds_dir->dd_lock);
spa_history_log_internal_ds(ds, "set refreservation", tx,
"refreservation=%lld", (longlong_t)effective_value);
}
int

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011 by Delphix. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#include <sys/dsl_dataset.h>
@ -163,12 +163,49 @@ dsl_deadlist_free(objset_t *os, uint64_t dlobj, dmu_tx_t *tx)
for (zap_cursor_init(&zc, os, dlobj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc))
bpobj_free(os, za.za_first_integer, tx);
zap_cursor_advance(&zc)) {
uint64_t obj = za.za_first_integer;
if (obj == dmu_objset_pool(os)->dp_empty_bpobj)
bpobj_decr_empty(os, tx);
else
bpobj_free(os, obj, tx);
}
zap_cursor_fini(&zc);
VERIFY3U(0, ==, dmu_object_free(os, dlobj, tx));
}
static void
dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
const blkptr_t *bp, dmu_tx_t *tx)
{
if (dle->dle_bpobj.bpo_object ==
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
bpobj_close(&dle->dle_bpobj);
bpobj_decr_empty(dl->dl_os, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
dle->dle_mintxg, obj, tx));
}
bpobj_enqueue(&dle->dle_bpobj, bp, tx);
}
static void
dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj, dmu_tx_t *tx)
{
if (dle->dle_bpobj.bpo_object !=
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
} else {
bpobj_close(&dle->dle_bpobj);
bpobj_decr_empty(dl->dl_os, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
VERIFY3U(0, ==, zap_update_int_key(dl->dl_os, dl->dl_object,
dle->dle_mintxg, obj, tx));
}
}
void
dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
{
@ -197,7 +234,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
else
dle = AVL_PREV(&dl->dl_tree, dle);
bpobj_enqueue(&dle->dle_bpobj, bp, tx);
dle_enqueue(dl, dle, bp, tx);
}
/*
@ -217,7 +254,7 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = mintxg;
obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);
@ -243,8 +280,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
dle = avl_find(&dl->dl_tree, &dle_tofind, NULL);
dle_prev = AVL_PREV(&dl->dl_tree, dle);
bpobj_enqueue_subobj(&dle_prev->dle_bpobj,
dle->dle_bpobj.bpo_object, tx);
dle_enqueue_subobj(dl, dle_prev, dle->dle_bpobj.bpo_object, tx);
avl_remove(&dl->dl_tree, dle);
bpobj_close(&dle->dle_bpobj);
@ -302,7 +338,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
if (dle->dle_mintxg >= maxtxg)
break;
obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
@ -400,7 +436,7 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
dle_enqueue_subobj(dl, dle, obj, tx);
}
static int

View File

@ -1050,9 +1050,6 @@ dsl_dir_set_quota_sync(void *arg1, void *arg2, dmu_tx_t *tx)
mutex_enter(&dd->dd_lock);
dd->dd_phys->dd_quota = effective_value;
mutex_exit(&dd->dd_lock);
spa_history_log_internal_dd(dd, "set quota", tx,
"quota=%lld", (longlong_t)effective_value);
}
int
@ -1172,9 +1169,6 @@ dsl_dir_set_reservation_sync(void *arg1, void *arg2, dmu_tx_t *tx)
DSL_PROP_CHECK_PREDICTION(dd, psa);
dsl_dir_set_reservation_sync_impl(dd, value, tx);
spa_history_log_internal_dd(dd, "set reservation", tx,
"reservation=%lld", (longlong_t)value);
}
int

View File

@ -126,8 +126,6 @@ dsl_pool_open(dsl_pool_t *dp)
dsl_dataset_t *ds;
uint64_t obj;
ASSERT(!dmu_objset_is_dirty_anywhere(dp->dp_meta_objset));
rw_enter(&dp->dp_config_rwlock, RW_WRITER);
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_ROOT_DATASET, sizeof (uint64_t), 1,
@ -184,6 +182,15 @@ dsl_pool_open(dsl_pool_t *dp)
goto out;
}
if (spa_feature_is_active(dp->dp_spa,
&spa_feature_table[SPA_FEATURE_EMPTY_BPOBJ])) {
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
&dp->dp_empty_bpobj);
if (err != 0)
goto out;
}
err = zap_lookup(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
DMU_POOL_TMP_USERREFS, sizeof (uint64_t), 1,
&dp->dp_tmp_userrefs_obj);

View File

@ -714,7 +714,7 @@ spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
vdev_config_dirty(rvd);
spa_config_exit(spa, SCL_STATE, FTAG);
spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld",
spa_history_log_internal(spa, "guid change", tx, "old=%llu new=%llu",
oldguid, *newguid);
}

View File

@ -20,6 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
*/
#ifndef _SYS_BPOBJ_H
@ -67,7 +68,9 @@ typedef struct bpobj {
typedef int bpobj_itor_t(void *arg, const blkptr_t *bp, dmu_tx_t *tx);
uint64_t bpobj_alloc(objset_t *mos, int blocksize, dmu_tx_t *tx);
uint64_t bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx);
void bpobj_free(objset_t *os, uint64_t obj, dmu_tx_t *tx);
void bpobj_decr_empty(objset_t *os, dmu_tx_t *tx);
int bpobj_open(bpobj_t *bpo, objset_t *mos, uint64_t object);
void bpobj_close(bpobj_t *bpo);

View File

@ -305,6 +305,7 @@ typedef void dmu_buf_evict_func_t(struct dmu_buf *db, void *user_ptr);
#define DMU_POOL_SCAN "scan"
#define DMU_POOL_FREE_BPOBJ "free_bpobj"
#define DMU_POOL_BPTREE_OBJ "bptree_obj"
#define DMU_POOL_EMPTY_BPOBJ "empty_bpobj"
/*
* Allocate an object from this objset. The range of object numbers

View File

@ -152,7 +152,6 @@ timestruc_t dmu_objset_snap_cmtime(objset_t *os);
/* called from dsl */
void dmu_objset_sync(objset_t *os, zio_t *zio, dmu_tx_t *tx);
boolean_t dmu_objset_is_dirty(objset_t *os, uint64_t txg);
boolean_t dmu_objset_is_dirty_anywhere(objset_t *os);
objset_t *dmu_objset_create_impl(spa_t *spa, struct dsl_dataset *ds,
blkptr_t *bp, dmu_objset_type_t type, dmu_tx_t *tx);
int dmu_objset_open_impl(spa_t *spa, struct dsl_dataset *ds, blkptr_t *bp,

View File

@ -262,6 +262,7 @@ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);

View File

@ -88,6 +88,7 @@ typedef struct dsl_pool {
uint64_t dp_tmp_userrefs_obj;
bpobj_t dp_free_bpobj;
uint64_t dp_bptree_obj;
uint64_t dp_empty_bpobj;
struct dsl_scan *dp_scan;

View File

@ -300,6 +300,8 @@ int zap_increment_int(objset_t *os, uint64_t obj, uint64_t key, int64_t delta,
/* Here the key is an int and the value is a different int. */
int zap_add_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t value, dmu_tx_t *tx);
int zap_update_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t value, dmu_tx_t *tx);
int zap_lookup_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t *valuep);

View File

@ -1093,6 +1093,16 @@ zap_add_int_key(objset_t *os, uint64_t obj,
return (zap_add(os, obj, name, 8, 1, &value, tx));
}
int
zap_update_int_key(objset_t *os, uint64_t obj,
uint64_t key, uint64_t value, dmu_tx_t *tx)
{
char name[20];
(void) snprintf(name, sizeof (name), "%llx", (longlong_t)key);
return (zap_update(os, obj, name, 8, 1, &value, tx));
}
int
zap_lookup_int_key(objset_t *os, uint64_t obj, uint64_t key, uint64_t *valuep)
{

View File

@ -221,7 +221,12 @@ feature_get_refcount(objset_t *os, uint64_t read_obj, uint64_t write_obj,
uint64_t refcount;
uint64_t zapobj = feature->fi_can_readonly ? write_obj : read_obj;
ASSERT(0 != zapobj);
/*
* If the pool is currently being created, the feature objects may not
* have been allocated yet. Act as though all features are disabled.
*/
if (zapobj == 0)
return (ENOTSUP);
err = zap_lookup(os, zapobj, feature->fi_guid, sizeof (uint64_t), 1,
&refcount);

View File

@ -1831,9 +1831,9 @@ zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
/*
* Evict cached data
*/
if (dmu_objset_is_dirty_anywhere(zfsvfs->z_os))
if (!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
if (dsl_dataset_is_dirty(dmu_objset_ds(zfsvfs->z_os)) &&
!(zfsvfs->z_vfs->vfs_flag & VFS_RDONLY))
txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
(void) dmu_objset_evict_dbufs(zfsvfs->z_os);
return (0);

View File

@ -641,8 +641,18 @@ zvol_last_close(zvol_state_t *zv)
{
zil_close(zv->zv_zilog);
zv->zv_zilog = NULL;
dmu_buf_rele(zv->zv_dbuf, zvol_tag);
zv->zv_dbuf = NULL;
/*
* Evict cached data
*/
if (dsl_dataset_is_dirty(dmu_objset_ds(zv->zv_objset)) &&
!(zv->zv_flags & ZVOL_RDONLY))
txg_wait_synced(dmu_objset_pool(zv->zv_objset), 0);
(void) dmu_objset_evict_dbufs(zv->zv_objset);
dmu_objset_disown(zv->zv_objset, zvol_tag);
zv->zv_objset = NULL;
}