From 1034179220ed4258352b23b759ef0c4849303c41 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Tue, 4 Sep 2012 21:58:22 +0000 Subject: [PATCH] Update vendor/illumos/dist and vendor/illumos-gate/dist to illumos-gate revision 13794:7c5e0e746b2c Obtained from: ssh://anonhg@hg.illumos.org/illumos-gate --- cmd/ztest/ztest.c | 2 + lib/libzpool/common/sys/zfs_context.h | 1 + uts/common/fs/zfs/arc.c | 74 +++++++++++++++++--- uts/common/fs/zfs/bptree.c | 3 +- uts/common/fs/zfs/dmu_traverse.c | 99 +++++++++++++++++++++++---- uts/common/fs/zfs/dmu_tx.c | 42 ++++++++---- uts/common/fs/zfs/dsl_dataset.c | 1 - uts/common/fs/zfs/dsl_dir.c | 12 ++-- uts/common/fs/zfs/dsl_synctask.c | 7 +- uts/common/fs/zfs/spa_history.c | 5 +- uts/common/fs/zfs/spa_misc.c | 12 ++++ uts/common/fs/zfs/sys/arc.h | 6 ++ uts/common/fs/zfs/sys/dnode.h | 2 +- uts/common/fs/zfs/sys/zfs_debug.h | 5 ++ uts/common/fs/zfs/zfs_ioctl.c | 10 +++ uts/common/fs/zfs/zio.c | 14 +++- 16 files changed, 238 insertions(+), 57 deletions(-) diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index bb6398cbda8c..caa783458400 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -5835,6 +5835,8 @@ main(int argc, char **argv) (void) setvbuf(stdout, NULL, _IOLBF, 0); + dprintf_setup(&argc, argv); + if (!ischild) { process_options(argc, argv); diff --git a/lib/libzpool/common/sys/zfs_context.h b/lib/libzpool/common/sys/zfs_context.h index 39af927f7105..8ca60b762552 100644 --- a/lib/libzpool/common/sys/zfs_context.h +++ b/lib/libzpool/common/sys/zfs_context.h @@ -61,6 +61,7 @@ extern "C" { #include #include #include +#include #include #include #include diff --git a/uts/common/fs/zfs/arc.c b/uts/common/fs/zfs/arc.c index bd6bda5c9bfc..630a4477ad99 100644 --- a/uts/common/fs/zfs/arc.c +++ b/uts/common/fs/zfs/arc.c @@ -135,6 +135,12 @@ #include #include +#ifndef _KERNEL +/* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */ +boolean_t arc_watch = B_FALSE; +int arc_procfd; +#endif + static kmutex_t arc_reclaim_thr_lock; static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */ static uint8_t arc_thread_exit; @@ -474,6 +480,7 @@ static void arc_get_data_buf(arc_buf_t *buf); static void arc_access(arc_buf_hdr_t *buf, kmutex_t *hash_lock); static int arc_evict_needed(arc_buf_contents_t type); static void arc_evict_ghost(arc_state_t *state, uint64_t spa, int64_t bytes); +static void arc_buf_watch(arc_buf_t *buf); static boolean_t l2arc_write_eligible(uint64_t spa_guid, arc_buf_hdr_t *ab); @@ -949,6 +956,50 @@ arc_cksum_compute(arc_buf_t *buf, boolean_t force) fletcher_2_native(buf->b_data, buf->b_hdr->b_size, buf->b_hdr->b_freeze_cksum); mutex_exit(&buf->b_hdr->b_freeze_lock); + arc_buf_watch(buf); +} + +#ifndef _KERNEL +typedef struct procctl { + long cmd; + prwatch_t prwatch; +} procctl_t; +#endif + +/* ARGSUSED */ +static void +arc_buf_unwatch(arc_buf_t *buf) +{ +#ifndef _KERNEL + if (arc_watch) { + int result; + procctl_t ctl; + ctl.cmd = PCWATCH; + ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data; + ctl.prwatch.pr_size = 0; + ctl.prwatch.pr_wflags = 0; + result = write(arc_procfd, &ctl, sizeof (ctl)); + ASSERT3U(result, ==, sizeof (ctl)); + } +#endif +} + +/* ARGSUSED */ +static void +arc_buf_watch(arc_buf_t *buf) +{ +#ifndef _KERNEL + if (arc_watch) { + int result; + procctl_t ctl; + ctl.cmd = PCWATCH; + ctl.prwatch.pr_vaddr = (uintptr_t)buf->b_data; + ctl.prwatch.pr_size = buf->b_hdr->b_size; + ctl.prwatch.pr_wflags = WA_WRITE; + result = write(arc_procfd, &ctl, sizeof (ctl)); + ASSERT3U(result, ==, sizeof (ctl)); + } +#endif } void @@ -975,6 +1026,8 @@ arc_buf_thaw(arc_buf_t *buf) } mutex_exit(&buf->b_hdr->b_freeze_lock); + + arc_buf_unwatch(buf); } void @@ -992,6 +1045,7 @@ arc_buf_freeze(arc_buf_t *buf) buf->b_hdr->b_state == arc_anon); arc_cksum_compute(buf, B_FALSE); mutex_exit(hash_lock); + } static void @@ -1348,21 +1402,22 @@ arc_buf_add_ref(arc_buf_t *buf, void* tag) * the buffer is placed on l2arc_free_on_write to be freed later. */ static void -arc_buf_data_free(arc_buf_hdr_t *hdr, void (*free_func)(void *, size_t), - void *data, size_t size) +arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t)) { + arc_buf_hdr_t *hdr = buf->b_hdr; + if (HDR_L2_WRITING(hdr)) { l2arc_data_free_t *df; df = kmem_alloc(sizeof (l2arc_data_free_t), KM_SLEEP); - df->l2df_data = data; - df->l2df_size = size; + df->l2df_data = buf->b_data; + df->l2df_size = hdr->b_size; df->l2df_func = free_func; mutex_enter(&l2arc_free_on_write_mtx); list_insert_head(l2arc_free_on_write, df); mutex_exit(&l2arc_free_on_write_mtx); ARCSTAT_BUMP(arcstat_l2_free_on_write); } else { - free_func(data, size); + free_func(buf->b_data, hdr->b_size); } } @@ -1378,16 +1433,15 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) arc_buf_contents_t type = buf->b_hdr->b_type; arc_cksum_verify(buf); + arc_buf_unwatch(buf); if (!recycle) { if (type == ARC_BUFC_METADATA) { - arc_buf_data_free(buf->b_hdr, zio_buf_free, - buf->b_data, size); + arc_buf_data_free(buf, zio_buf_free); arc_space_return(size, ARC_SPACE_DATA); } else { ASSERT(type == ARC_BUFC_DATA); - arc_buf_data_free(buf->b_hdr, - zio_data_buf_free, buf->b_data, size); + arc_buf_data_free(buf, zio_data_buf_free); ARCSTAT_INCR(arcstat_data_size, -size); atomic_add_64(&arc_size, -size); } @@ -2556,6 +2610,7 @@ arc_read_done(zio_t *zio) } arc_cksum_compute(buf, B_FALSE); + arc_buf_watch(buf); if (hash_lock && zio->io_error == 0 && hdr->b_state == arc_anon) { /* @@ -3113,6 +3168,7 @@ arc_release(arc_buf_t *buf, void *tag) } hdr->b_datacnt -= 1; arc_cksum_verify(buf); + arc_buf_unwatch(buf); mutex_exit(hash_lock); diff --git a/uts/common/fs/zfs/bptree.c b/uts/common/fs/zfs/bptree.c index 8c5a7d40ef37..ca4cd254689c 100644 --- a/uts/common/fs/zfs/bptree.c +++ b/uts/common/fs/zfs/bptree.c @@ -189,7 +189,8 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func, break; err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp, - bte.be_birth_txg, &bte.be_zb, TRAVERSE_POST, + bte.be_birth_txg, &bte.be_zb, + TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST, bptree_visit_cb, &ba); if (free) { ASSERT(err == 0 || err == ERESTART); diff --git a/uts/common/fs/zfs/dmu_traverse.c b/uts/common/fs/zfs/dmu_traverse.c index bfe9e6506426..a01c11bbab1d 100644 --- a/uts/common/fs/zfs/dmu_traverse.c +++ b/uts/common/fs/zfs/dmu_traverse.c @@ -63,6 +63,8 @@ typedef struct traverse_data { static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, arc_buf_t *buf, uint64_t objset, uint64_t object); +static void prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *, + arc_buf_t *buf, uint64_t objset, uint64_t object); static int traverse_zil_block(zilog_t *zilog, blkptr_t *bp, void *arg, uint64_t claim_txg) @@ -178,9 +180,34 @@ traverse_pause(traverse_data_t *td, const zbookmark_t *zb) bcopy(zb, td->td_resume, sizeof (*td->td_resume)); } +static void +traverse_prefetch_metadata(traverse_data_t *td, + arc_buf_t *pbuf, const blkptr_t *bp, const zbookmark_t *zb) +{ + uint32_t flags = ARC_NOWAIT | ARC_PREFETCH; + + if (!(td->td_flags & TRAVERSE_PREFETCH_METADATA)) + return; + /* + * If we are in the process of resuming, don't prefetch, because + * some children will not be needed (and in fact may have already + * been freed). + */ + if (td->td_resume != NULL && !ZB_IS_ZERO(td->td_resume)) + return; + if (BP_IS_HOLE(bp) || bp->blk_birth <= td->td_min_txg) + return; + if (BP_GET_LEVEL(bp) == 0 && BP_GET_TYPE(bp) != DMU_OT_DNODE) + return; + + (void) arc_read(NULL, td->td_spa, bp, + pbuf, NULL, NULL, ZIO_PRIORITY_ASYNC_READ, + ZIO_FLAG_CANFAIL, &flags, zb); +} + static int traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, - arc_buf_t *pbuf, blkptr_t *bp, const zbookmark_t *zb) + arc_buf_t *pbuf, const blkptr_t *bp, const zbookmark_t *zb) { zbookmark_t czb; int err = 0, lasterr = 0; @@ -243,14 +270,21 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err) return (err); - - /* recursively visitbp() blocks below this */ cbp = buf->b_data; - for (i = 0; i < epb; i++, cbp++) { + + for (i = 0; i < epb; i++) { SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, zb->zb_level - 1, zb->zb_blkid * epb + i); - err = traverse_visitbp(td, dnp, buf, cbp, &czb); + traverse_prefetch_metadata(td, buf, &cbp[i], &czb); + } + + /* recursively visitbp() blocks below this */ + for (i = 0; i < epb; i++) { + SET_BOOKMARK(&czb, zb->zb_objset, zb->zb_object, + zb->zb_level - 1, + zb->zb_blkid * epb + i); + err = traverse_visitbp(td, dnp, buf, &cbp[i], &czb); if (err) { if (!hard) break; @@ -267,11 +301,16 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, ZIO_PRIORITY_ASYNC_READ, ZIO_FLAG_CANFAIL, &flags, zb); if (err) return (err); + dnp = buf->b_data; + + for (i = 0; i < epb; i++) { + prefetch_dnode_metadata(td, &dnp[i], buf, zb->zb_objset, + zb->zb_blkid * epb + i); + } /* recursively visitbp() blocks below this */ - dnp = buf->b_data; - for (i = 0; i < epb; i++, dnp++) { - err = traverse_dnode(td, dnp, buf, zb->zb_objset, + for (i = 0; i < epb; i++) { + err = traverse_dnode(td, &dnp[i], buf, zb->zb_objset, zb->zb_blkid * epb + i); if (err) { if (!hard) @@ -292,6 +331,15 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp, osp = buf->b_data; dnp = &osp->os_meta_dnode; + prefetch_dnode_metadata(td, dnp, buf, zb->zb_objset, + DMU_META_DNODE_OBJECT); + if (arc_buf_size(buf) >= sizeof (objset_phys_t)) { + prefetch_dnode_metadata(td, &osp->os_userused_dnode, + buf, zb->zb_objset, DMU_USERUSED_OBJECT); + prefetch_dnode_metadata(td, &osp->os_groupused_dnode, + buf, zb->zb_objset, DMU_USERUSED_OBJECT); + } + err = traverse_dnode(td, dnp, buf, zb->zb_objset, DMU_META_DNODE_OBJECT); if (err && hard) { @@ -334,6 +382,24 @@ post: return (err != 0 ? err : lasterr); } +static void +prefetch_dnode_metadata(traverse_data_t *td, const dnode_phys_t *dnp, + arc_buf_t *buf, uint64_t objset, uint64_t object) +{ + int j; + zbookmark_t czb; + + for (j = 0; j < dnp->dn_nblkptr; j++) { + SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); + traverse_prefetch_metadata(td, buf, &dnp->dn_blkptr[j], &czb); + } + + if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { + SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); + traverse_prefetch_metadata(td, buf, &dnp->dn_spill, &czb); + } +} + static int traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, arc_buf_t *buf, uint64_t objset, uint64_t object) @@ -344,8 +410,7 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, for (j = 0; j < dnp->dn_nblkptr; j++) { SET_BOOKMARK(&czb, objset, object, dnp->dn_nlevels - 1, j); - err = traverse_visitbp(td, dnp, buf, - (blkptr_t *)&dnp->dn_blkptr[j], &czb); + err = traverse_visitbp(td, dnp, buf, &dnp->dn_blkptr[j], &czb); if (err) { if (!hard) break; @@ -354,10 +419,8 @@ traverse_dnode(traverse_data_t *td, const dnode_phys_t *dnp, } if (dnp->dn_flags & DNODE_FLAG_SPILL_BLKPTR) { - SET_BOOKMARK(&czb, objset, - object, 0, DMU_SPILL_BLKID); - err = traverse_visitbp(td, dnp, buf, - (blkptr_t *)&dnp->dn_spill, &czb); + SET_BOOKMARK(&czb, objset, object, 0, DMU_SPILL_BLKID); + err = traverse_visitbp(td, dnp, buf, &dnp->dn_spill, &czb); if (err) { if (!hard) return (err); @@ -438,6 +501,12 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, ASSERT(ds == NULL || objset == ds->ds_object); ASSERT(!(flags & TRAVERSE_PRE) || !(flags & TRAVERSE_POST)); + /* + * The data prefetching mechanism (the prefetch thread) is incompatible + * with resuming from a bookmark. + */ + ASSERT(resume == NULL || !(flags & TRAVERSE_PREFETCH_DATA)); + td.td_spa = spa; td.td_objset = objset; td.td_rootbp = rootbp; @@ -464,7 +533,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp, traverse_zil(&td, &os->os_zil_header); } - if (!(flags & TRAVERSE_PREFETCH) || + if (!(flags & TRAVERSE_PREFETCH_DATA) || 0 == taskq_dispatch(system_taskq, traverse_prefetch_thread, &td, TQ_NOQUEUE)) pd.pd_exited = B_TRUE; diff --git a/uts/common/fs/zfs/dmu_tx.c b/uts/common/fs/zfs/dmu_tx.c index 723d62b48542..5e3971ad451e 100644 --- a/uts/common/fs/zfs/dmu_tx.c +++ b/uts/common/fs/zfs/dmu_tx.c @@ -429,6 +429,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) dsl_dataset_t *ds = dn->dn_objset->os_dsl_dataset; spa_t *spa = txh->txh_tx->tx_pool->dp_spa; int epbs; + uint64_t l0span = 0, nl1blks = 0; if (dn->dn_nlevels == 0) return; @@ -461,6 +462,7 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) nblks = dn->dn_maxblkid - blkid; } + l0span = nblks; /* save for later use to calc level > 1 overhead */ if (dn->dn_nlevels == 1) { int i; for (i = 0; i < nblks; i++) { @@ -473,24 +475,10 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) } unref += BP_GET_ASIZE(bp); } + nl1blks = 1; nblks = 0; } - /* - * Add in memory requirements of higher-level indirects. - * This assumes a worst-possible scenario for dn_nlevels. - */ - { - uint64_t blkcnt = 1 + ((nblks >> epbs) >> epbs); - int level = (dn->dn_nlevels > 1) ? 2 : 1; - - while (level++ < DN_MAX_LEVELS) { - txh->txh_memory_tohold += blkcnt << dn->dn_indblkshift; - blkcnt = 1 + (blkcnt >> epbs); - } - ASSERT(blkcnt <= dn->dn_nblkptr); - } - lastblk = blkid + nblks - 1; while (nblks) { dmu_buf_impl_t *dbuf; @@ -561,11 +549,35 @@ dmu_tx_count_free(dmu_tx_hold_t *txh, uint64_t off, uint64_t len) } dbuf_rele(dbuf, FTAG); + ++nl1blks; blkid += tochk; nblks -= tochk; } rw_exit(&dn->dn_struct_rwlock); + /* + * Add in memory requirements of higher-level indirects. + * This assumes a worst-possible scenario for dn_nlevels and a + * worst-possible distribution of l1-blocks over the region to free. + */ + { + uint64_t blkcnt = 1 + ((l0span >> epbs) >> epbs); + int level = 2; + /* + * Here we don't use DN_MAX_LEVEL, but calculate it with the + * given datablkshift and indblkshift. This makes the + * difference between 19 and 8 on large files. + */ + int maxlevel = 2 + (DN_MAX_OFFSET_SHIFT - dn->dn_datablkshift) / + (dn->dn_indblkshift - SPA_BLKPTRSHIFT); + + while (level++ < maxlevel) { + txh->txh_memory_tohold += MIN(blkcnt, (nl1blks >> epbs)) + << dn->dn_indblkshift; + blkcnt = 1 + (blkcnt >> epbs); + } + } + /* account for new level 1 indirect blocks that might show up */ if (skipped > 0) { txh->txh_fudge += skipped << dn->dn_indblkshift; diff --git a/uts/common/fs/zfs/dsl_dataset.c b/uts/common/fs/zfs/dsl_dataset.c index 0a2e7191c8a6..4571fa8743a3 100644 --- a/uts/common/fs/zfs/dsl_dataset.c +++ b/uts/common/fs/zfs/dsl_dataset.c @@ -2302,7 +2302,6 @@ dsl_dataset_stats(dsl_dataset_t *ds, nvlist_t *nv) } } } - } void diff --git a/uts/common/fs/zfs/dsl_dir.c b/uts/common/fs/zfs/dsl_dir.c index 365612879375..e58260cdf484 100644 --- a/uts/common/fs/zfs/dsl_dir.c +++ b/uts/common/fs/zfs/dsl_dir.c @@ -456,12 +456,14 @@ dsl_dir_destroy_check(void *arg1, void *arg2, dmu_tx_t *tx) /* * There should be exactly two holds, both from * dsl_dataset_destroy: one on the dd directory, and one on its - * head ds. Otherwise, someone is trying to lookup something - * inside this dir while we want to destroy it. The - * config_rwlock ensures that nobody else opens it after we - * check. + * head ds. If there are more holds, then a concurrent thread is + * performing a lookup inside this dir while we're trying to destroy + * it. To minimize this possibility, we perform this check only + * in syncing context and fail the operation if we encounter + * additional holds. The dp_config_rwlock ensures that nobody else + * opens it after we check. */ - if (dmu_buf_refcount(dd->dd_dbuf) > 2) + if (dmu_tx_is_syncing(tx) && dmu_buf_refcount(dd->dd_dbuf) > 2) return (EBUSY); err = zap_count(mos, dd->dd_phys->dd_child_dir_zapobj, &count); diff --git a/uts/common/fs/zfs/dsl_synctask.c b/uts/common/fs/zfs/dsl_synctask.c index 312423e943b5..4e56d28d8237 100644 --- a/uts/common/fs/zfs/dsl_synctask.c +++ b/uts/common/fs/zfs/dsl_synctask.c @@ -230,12 +230,7 @@ dsl_sync_task_do_nowait(dsl_pool_t *dp, dsl_checkfunc_t *checkfunc, dsl_syncfunc_t *syncfunc, void *arg1, void *arg2, int blocks_modified, dmu_tx_t *tx) { - dsl_sync_task_group_t *dstg; - - if (!spa_writeable(dp->dp_spa)) - return; - - dstg = dsl_sync_task_group_create(dp); + dsl_sync_task_group_t *dstg = dsl_sync_task_group_create(dp); dsl_sync_task_create(dstg, checkfunc, syncfunc, arg1, arg2, blocks_modified); dsl_sync_task_group_nowait(dstg, tx); diff --git a/uts/common/fs/zfs/spa_history.c b/uts/common/fs/zfs/spa_history.c index 96970dafba69..9ae28739f377 100644 --- a/uts/common/fs/zfs/spa_history.c +++ b/uts/common/fs/zfs/spa_history.c @@ -303,7 +303,7 @@ spa_history_log_nvl(spa_t *spa, nvlist_t *nvl) dmu_tx_t *tx; nvlist_t *nvarg; - if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) + if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY || !spa_writeable(spa)) return (EINVAL); tx = dmu_tx_create_dd(spa_get_dsl(spa)->dp_mos_dir); @@ -439,8 +439,9 @@ log_internal(nvlist_t *nvl, const char *operation, spa_t *spa, /* * If this is part of creating a pool, not everything is * initialized yet, so don't bother logging the internal events. + * Likewise if the pool is not writeable. */ - if (tx->tx_txg == TXG_INITIAL) { + if (tx->tx_txg == TXG_INITIAL || !spa_writeable(spa)) { fnvlist_free(nvl); return; } diff --git a/uts/common/fs/zfs/spa_misc.c b/uts/common/fs/zfs/spa_misc.c index 9f50e38652ce..30681b6464b9 100644 --- a/uts/common/fs/zfs/spa_misc.c +++ b/uts/common/fs/zfs/spa_misc.c @@ -1600,6 +1600,18 @@ spa_init(int mode) spa_mode_global = mode; +#ifndef _KERNEL + if (spa_mode_global != FREAD && dprintf_find_string("watch")) { + arc_procfd = open("/proc/self/ctl", O_WRONLY); + if (arc_procfd == -1) { + perror("could not enable watchpoints: " + "opening /proc/self/ctl failed: "); + } else { + arc_watch = B_TRUE; + } + } +#endif + refcount_init(); unique_init(); zio_init(); diff --git a/uts/common/fs/zfs/sys/arc.h b/uts/common/fs/zfs/sys/arc.h index 8f189c62d31d..28dbc57275cb 100644 --- a/uts/common/fs/zfs/sys/arc.h +++ b/uts/common/fs/zfs/sys/arc.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_ARC_H @@ -135,6 +136,11 @@ void l2arc_fini(void); void l2arc_start(void); void l2arc_stop(void); +#ifndef _KERNEL +extern boolean_t arc_watch; +extern int arc_procfd; +#endif + #ifdef __cplusplus } #endif diff --git a/uts/common/fs/zfs/sys/dnode.h b/uts/common/fs/zfs/sys/dnode.h index 9ad4be36bf85..9f9134d8cdbe 100644 --- a/uts/common/fs/zfs/sys/dnode.h +++ b/uts/common/fs/zfs/sys/dnode.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_DNODE_H @@ -276,7 +277,6 @@ void dnode_byteswap(dnode_phys_t *dnp); void dnode_buf_byteswap(void *buf, size_t size); void dnode_verify(dnode_t *dn); int dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx); -uint64_t dnode_current_max_length(dnode_t *dn); void dnode_free_range(dnode_t *dn, uint64_t off, uint64_t len, dmu_tx_t *tx); void dnode_clear_range(dnode_t *dn, uint64_t blkid, uint64_t nblks, dmu_tx_t *tx); diff --git a/uts/common/fs/zfs/sys/zfs_debug.h b/uts/common/fs/zfs/sys/zfs_debug.h index 50ecf9b36249..94626229add7 100644 --- a/uts/common/fs/zfs/sys/zfs_debug.h +++ b/uts/common/fs/zfs/sys/zfs_debug.h @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012 by Delphix. All rights reserved. */ #ifndef _SYS_ZFS_DEBUG_H @@ -75,6 +76,10 @@ extern void zfs_dbgmsg_init(void); extern void zfs_dbgmsg_fini(void); extern void zfs_dbgmsg(const char *fmt, ...); +#ifndef _KERNEL +extern int dprintf_find_string(const char *string); +#endif + #ifdef __cplusplus } #endif diff --git a/uts/common/fs/zfs/zfs_ioctl.c b/uts/common/fs/zfs/zfs_ioctl.c index 213142740162..b45ee7476e59 100644 --- a/uts/common/fs/zfs/zfs_ioctl.c +++ b/uts/common/fs/zfs/zfs_ioctl.c @@ -4335,7 +4335,17 @@ zfs_ioc_pool_reopen(zfs_cmd_t *zc) return (error); spa_vdev_state_enter(spa, SCL_NONE); + + /* + * If a resilver is already in progress then set the + * spa_scrub_reopen flag to B_TRUE so that we don't restart + * the scan as a side effect of the reopen. Otherwise, let + * vdev_open() decided if a resilver is required. + */ + spa->spa_scrub_reopen = dsl_scan_resilvering(spa->spa_dsl_pool); vdev_reopen(spa->spa_root_vdev); + spa->spa_scrub_reopen = B_FALSE; + (void) spa_vdev_state_exit(spa, NULL, 0); spa_close(spa, FTAG); return (0); diff --git a/uts/common/fs/zfs/zio.c b/uts/common/fs/zfs/zio.c index cfb5733f2bd7..33807a1a16fc 100644 --- a/uts/common/fs/zfs/zio.c +++ b/uts/common/fs/zfs/zio.c @@ -125,11 +125,21 @@ zio_init(void) while (p2 & (p2 - 1)) p2 &= p2 - 1; +#ifndef _KERNEL + /* + * If we are using watchpoints, put each buffer on its own page, + * to eliminate the performance overhead of trapping to the + * kernel when modifying a non-watched buffer that shares the + * page with a watched buffer. + */ + if (arc_watch && !IS_P2ALIGNED(size, PAGESIZE)) + continue; +#endif if (size <= 4 * SPA_MINBLOCKSIZE) { align = SPA_MINBLOCKSIZE; - } else if (P2PHASE(size, PAGESIZE) == 0) { + } else if (IS_P2ALIGNED(size, PAGESIZE)) { align = PAGESIZE; - } else if (P2PHASE(size, p2 >> 2) == 0) { + } else if (IS_P2ALIGNED(size, p2 >> 2)) { align = p2 >> 2; }