Merge ^/head r305301 through r305345.
This commit is contained in:
commit
86ea529132
@ -68,6 +68,9 @@ ${PACKAGE}FILES+= dot1.0
|
||||
${PACKAGE}FILES+= dot2.0
|
||||
${PACKAGE}FILES+= dot3.0
|
||||
${PACKAGE}FILES+= dot4.0
|
||||
${PACKAGE}FILES+= echo1.0
|
||||
${PACKAGE}FILES+= echo2.0
|
||||
${PACKAGE}FILES+= echo3.0
|
||||
${PACKAGE}FILES+= eval1.0
|
||||
${PACKAGE}FILES+= eval2.0
|
||||
${PACKAGE}FILES+= eval3.0
|
||||
|
6
bin/sh/tests/builtins/echo1.0
Normal file
6
bin/sh/tests/builtins/echo1.0
Normal file
@ -0,0 +1,6 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# Not specified by POSIX.
|
||||
|
||||
[ "`echo -n a b; echo c d; echo e f`" = "a bc d
|
||||
e f" ]
|
7
bin/sh/tests/builtins/echo2.0
Normal file
7
bin/sh/tests/builtins/echo2.0
Normal file
@ -0,0 +1,7 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# Not specified by POSIX.
|
||||
|
||||
a=`echo -e '\a\b\e\f\n\r\t\v\\\\\0041\c'; echo .`
|
||||
b=`printf '\a\b\033\f\n\r\t\v\\\\!.'`
|
||||
[ "$a" = "$b" ]
|
5
bin/sh/tests/builtins/echo3.0
Normal file
5
bin/sh/tests/builtins/echo3.0
Normal file
@ -0,0 +1,5 @@
|
||||
# $FreeBSD$
|
||||
|
||||
# Not specified by POSIX.
|
||||
|
||||
[ "`echo -e 'a\cb' c; echo d`" = "ad" ]
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
*/
|
||||
|
||||
@ -117,7 +117,7 @@ static void
|
||||
usage(void)
|
||||
{
|
||||
(void) fprintf(stderr,
|
||||
"Usage: %s [-CumMdibcsDvhLXFPA] [-t txg] [-e [-p path...]] "
|
||||
"Usage: %s [-CumMdibcsDvhLXFPAG] [-t txg] [-e [-p path...]] "
|
||||
"[-U config] [-I inflight I/Os] [-x dumpdir] poolname [object...]\n"
|
||||
" %s [-divPA] [-e -p path...] [-U config] dataset "
|
||||
"[object...]\n"
|
||||
@ -178,12 +178,23 @@ usage(void)
|
||||
(void) fprintf(stderr, " -I <number of inflight I/Os> -- "
|
||||
"specify the maximum number of "
|
||||
"checksumming I/Os [default is 200]\n");
|
||||
(void) fprintf(stderr, " -G dump zfs_dbgmsg buffer before "
|
||||
"exiting\n");
|
||||
(void) fprintf(stderr, "Specify an option more than once (e.g. -bb) "
|
||||
"to make only that option verbose\n");
|
||||
(void) fprintf(stderr, "Default is to dump everything non-verbosely\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void
|
||||
dump_debug_buffer()
|
||||
{
|
||||
if (dump_opt['G']) {
|
||||
(void) printf("\n");
|
||||
zfs_dbgmsg_print("zdb");
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Called for usage errors that are discovered after a call to spa_open(),
|
||||
* dmu_bonus_hold(), or pool_match(). abort() is called for other errors.
|
||||
@ -200,6 +211,8 @@ fatal(const char *fmt, ...)
|
||||
va_end(ap);
|
||||
(void) fprintf(stderr, "\n");
|
||||
|
||||
dump_debug_buffer();
|
||||
|
||||
exit(1);
|
||||
}
|
||||
|
||||
@ -1289,7 +1302,7 @@ visit_indirect(spa_t *spa, const dnode_phys_t *dnp,
|
||||
}
|
||||
if (!err)
|
||||
ASSERT3U(fill, ==, BP_GET_FILL(bp));
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
arc_buf_destroy(buf, &buf);
|
||||
}
|
||||
|
||||
return (err);
|
||||
@ -3103,8 +3116,10 @@ dump_zpool(spa_t *spa)
|
||||
if (dump_opt['h'])
|
||||
dump_history(spa);
|
||||
|
||||
if (rc != 0)
|
||||
if (rc != 0) {
|
||||
dump_debug_buffer();
|
||||
exit(rc);
|
||||
}
|
||||
}
|
||||
|
||||
#define ZDB_FLAG_CHECKSUM 0x0001
|
||||
@ -3575,7 +3590,7 @@ main(int argc, char **argv)
|
||||
spa_config_path = spa_config_path_env;
|
||||
|
||||
while ((c = getopt(argc, argv,
|
||||
"bcdhilmMI:suCDRSAFLXx:evp:t:U:P")) != -1) {
|
||||
"bcdhilmMI:suCDRSAFLXx:evp:t:U:PG")) != -1) {
|
||||
switch (c) {
|
||||
case 'b':
|
||||
case 'c':
|
||||
@ -3591,6 +3606,7 @@ main(int argc, char **argv)
|
||||
case 'M':
|
||||
case 'R':
|
||||
case 'S':
|
||||
case 'G':
|
||||
dump_opt[c]++;
|
||||
dump_all = 0;
|
||||
break;
|
||||
@ -3826,6 +3842,8 @@ main(int argc, char **argv)
|
||||
fuid_table_destroy();
|
||||
sa_loaded = B_FALSE;
|
||||
|
||||
dump_debug_buffer();
|
||||
|
||||
libzfs_fini(g_zfs);
|
||||
kernel_fini();
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
@ -189,6 +189,7 @@ extern uint64_t metaslab_gang_bang;
|
||||
extern uint64_t metaslab_df_alloc_threshold;
|
||||
extern uint64_t zfs_deadman_synctime_ms;
|
||||
extern int metaslab_preload_limit;
|
||||
extern boolean_t zfs_compressed_arc_enabled;
|
||||
|
||||
static ztest_shared_opts_t *ztest_shared_opts;
|
||||
static ztest_shared_opts_t ztest_opts;
|
||||
@ -4792,7 +4793,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
|
||||
char path0[MAXPATHLEN];
|
||||
char pathrand[MAXPATHLEN];
|
||||
size_t fsize;
|
||||
int bshift = SPA_OLD_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
|
||||
int bshift = SPA_MAXBLOCKSHIFT + 2;
|
||||
int iters = 1000;
|
||||
int maxfaults;
|
||||
int mirror_save;
|
||||
@ -4953,11 +4954,58 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
|
||||
fsize = lseek(fd, 0, SEEK_END);
|
||||
|
||||
while (--iters != 0) {
|
||||
/*
|
||||
* The offset must be chosen carefully to ensure that
|
||||
* we do not inject a given logical block with errors
|
||||
* on two different leaf devices, because ZFS can not
|
||||
* tolerate that (if maxfaults==1).
|
||||
*
|
||||
* We divide each leaf into chunks of size
|
||||
* (# leaves * SPA_MAXBLOCKSIZE * 4). Within each chunk
|
||||
* there is a series of ranges to which we can inject errors.
|
||||
* Each range can accept errors on only a single leaf vdev.
|
||||
* The error injection ranges are separated by ranges
|
||||
* which we will not inject errors on any device (DMZs).
|
||||
* Each DMZ must be large enough such that a single block
|
||||
* can not straddle it, so that a single block can not be
|
||||
* a target in two different injection ranges (on different
|
||||
* leaf vdevs).
|
||||
*
|
||||
* For example, with 3 leaves, each chunk looks like:
|
||||
* 0 to 32M: injection range for leaf 0
|
||||
* 32M to 64M: DMZ - no injection allowed
|
||||
* 64M to 96M: injection range for leaf 1
|
||||
* 96M to 128M: DMZ - no injection allowed
|
||||
* 128M to 160M: injection range for leaf 2
|
||||
* 160M to 192M: DMZ - no injection allowed
|
||||
*/
|
||||
offset = ztest_random(fsize / (leaves << bshift)) *
|
||||
(leaves << bshift) + (leaf << bshift) +
|
||||
(ztest_random(1ULL << (bshift - 1)) & -8ULL);
|
||||
|
||||
if (offset >= fsize)
|
||||
/*
|
||||
* Only allow damage to the labels at one end of the vdev.
|
||||
*
|
||||
* If all labels are damaged, the device will be totally
|
||||
* inaccessible, which will result in loss of data,
|
||||
* because we also damage (parts of) the other side of
|
||||
* the mirror/raidz.
|
||||
*
|
||||
* Additionally, we will always have both an even and an
|
||||
* odd label, so that we can handle crashes in the
|
||||
* middle of vdev_config_sync().
|
||||
*/
|
||||
if ((leaf & 1) == 0 && offset < VDEV_LABEL_START_SIZE)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* The two end labels are stored at the "end" of the disk, but
|
||||
* the end of the disk (vdev_psize) is aligned to
|
||||
* sizeof (vdev_label_t).
|
||||
*/
|
||||
uint64_t psize = P2ALIGN(fsize, sizeof (vdev_label_t));
|
||||
if ((leaf & 1) == 1 &&
|
||||
offset + sizeof (bad) > psize - VDEV_LABEL_END_SIZE)
|
||||
continue;
|
||||
|
||||
VERIFY(mutex_lock(&ztest_vdev_lock) == 0);
|
||||
@ -5021,9 +5069,14 @@ ztest_ddt_repair(ztest_ds_t *zd, uint64_t id)
|
||||
return;
|
||||
}
|
||||
|
||||
dmu_objset_stats_t dds;
|
||||
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
|
||||
dmu_objset_fast_stat(os, &dds);
|
||||
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
|
||||
|
||||
object = od[0].od_object;
|
||||
blocksize = od[0].od_blocksize;
|
||||
pattern = zs->zs_guid ^ dmu_objset_fsid_guid(os);
|
||||
pattern = zs->zs_guid ^ dds.dds_guid;
|
||||
|
||||
ASSERT(object != 0);
|
||||
|
||||
@ -5355,6 +5408,12 @@ ztest_resume_thread(void *arg)
|
||||
if (spa_suspended(spa))
|
||||
ztest_resume(spa);
|
||||
(void) poll(NULL, 0, 100);
|
||||
|
||||
/*
|
||||
* Periodically change the zfs_compressed_arc_enabled setting.
|
||||
*/
|
||||
if (ztest_random(10) == 0)
|
||||
zfs_compressed_arc_enabled = ztest_random(2);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
@ -5620,9 +5679,13 @@ ztest_run(ztest_shared_t *zs)
|
||||
metaslab_preload_limit = ztest_random(20) + 1;
|
||||
ztest_spa = spa;
|
||||
|
||||
dmu_objset_stats_t dds;
|
||||
VERIFY0(dmu_objset_own(ztest_opts.zo_pool,
|
||||
DMU_OST_ANY, B_TRUE, FTAG, &os));
|
||||
zs->zs_guid = dmu_objset_fsid_guid(os);
|
||||
dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
|
||||
dmu_objset_fast_stat(os, &dds);
|
||||
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
|
||||
zs->zs_guid = dds.dds_guid;
|
||||
dmu_objset_disown(os, FTAG);
|
||||
|
||||
spa->spa_dedup_ditto = 2 * ZIO_DEDUPDITTO_MIN;
|
||||
|
@ -102,6 +102,8 @@ read_client_conf(void)
|
||||
[top_level_config.requested_option_count++] = DHO_HOST_NAME;
|
||||
top_level_config.requested_options
|
||||
[top_level_config.requested_option_count++] = DHO_DOMAIN_SEARCH;
|
||||
top_level_config.requested_options
|
||||
[top_level_config.requested_option_count++] = DHO_INTERFACE_MTU;
|
||||
|
||||
if ((cfile = fopen(path_dhclient_conf, "r")) != NULL) {
|
||||
do {
|
||||
|
@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "privsep.h"
|
||||
|
||||
#include <sys/capsicum.h>
|
||||
#include <sys/endian.h>
|
||||
|
||||
#include <net80211/ieee80211_freebsd.h>
|
||||
|
||||
@ -132,6 +133,9 @@ int fork_privchld(int, int);
|
||||
((a) > 0 ? (1 + (((a) - 1) | (sizeof(long) - 1))) : sizeof(long))
|
||||
#define ADVANCE(x, n) (x += ROUNDUP((n)->sa_len))
|
||||
|
||||
/* Minimum MTU is 68 as per RFC791, p. 24 */
|
||||
#define MIN_MTU 68
|
||||
|
||||
static time_t scripttime;
|
||||
|
||||
int
|
||||
@ -798,9 +802,20 @@ dhcpack(struct packet *packet)
|
||||
void
|
||||
bind_lease(struct interface_info *ip)
|
||||
{
|
||||
struct option_data *opt;
|
||||
|
||||
/* Remember the medium. */
|
||||
ip->client->new->medium = ip->client->medium;
|
||||
|
||||
opt = &ip->client->new->options[DHO_INTERFACE_MTU];
|
||||
if (opt->len == sizeof(u_int16_t)) {
|
||||
u_int16_t mtu = be16dec(opt->data);
|
||||
if (mtu < MIN_MTU)
|
||||
warning("mtu size %u < %d: ignored", (unsigned)mtu, MIN_MTU);
|
||||
else
|
||||
interface_set_mtu_unpriv(privfd, mtu);
|
||||
}
|
||||
|
||||
/* Write out the new lease. */
|
||||
write_client_lease(ip, ip->client->new, 0);
|
||||
|
||||
|
@ -319,6 +319,8 @@ void cancel_timeout(void (*)(void *), void *);
|
||||
void add_protocol(char *, int, void (*)(struct protocol *), void *);
|
||||
void remove_protocol(struct protocol *);
|
||||
int interface_link_status(char *);
|
||||
void interface_set_mtu_unpriv(int, u_int16_t);
|
||||
void interface_set_mtu_priv(char *, u_int16_t);
|
||||
|
||||
/* hash.c */
|
||||
struct hash_table *new_hash(void);
|
||||
|
@ -43,6 +43,7 @@
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include "dhcpd.h"
|
||||
#include "privsep.h"
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
|
||||
@ -501,3 +502,46 @@ interface_link_status(char *ifname)
|
||||
}
|
||||
return (1);
|
||||
}
|
||||
|
||||
void
|
||||
interface_set_mtu_unpriv(int privfd, u_int16_t mtu)
|
||||
{
|
||||
struct imsg_hdr hdr;
|
||||
struct buf *buf;
|
||||
int errs = 0;
|
||||
|
||||
hdr.code = IMSG_SET_INTERFACE_MTU;
|
||||
hdr.len = sizeof(hdr) +
|
||||
sizeof(u_int16_t);
|
||||
|
||||
if ((buf = buf_open(hdr.len)) == NULL)
|
||||
error("buf_open: %m");
|
||||
|
||||
errs += buf_add(buf, &hdr, sizeof(hdr));
|
||||
errs += buf_add(buf, &mtu, sizeof(mtu));
|
||||
if (errs)
|
||||
error("buf_add: %m");
|
||||
|
||||
if (buf_close(privfd, buf) == -1)
|
||||
error("buf_close: %m");
|
||||
}
|
||||
|
||||
void
|
||||
interface_set_mtu_priv(char *ifname, u_int16_t mtu)
|
||||
{
|
||||
struct ifreq ifr;
|
||||
int sock;
|
||||
|
||||
if ((sock = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
|
||||
error("Can't create socket");
|
||||
|
||||
memset(&ifr, 0, sizeof(ifr));
|
||||
|
||||
strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
|
||||
ifr.ifr_mtu = mtu;
|
||||
|
||||
if (ioctl(sock, SIOCSIFMTU, &ifr) == -1)
|
||||
warning("SIOCSIFMTU failed (%d): %s", mtu,
|
||||
strerror(errno));
|
||||
close(sock);
|
||||
}
|
||||
|
@ -111,6 +111,7 @@ dispatch_imsg(struct interface_info *ifi, int fd)
|
||||
struct client_lease lease;
|
||||
int ret, i, optlen;
|
||||
struct buf *buf;
|
||||
u_int16_t mtu;
|
||||
|
||||
buf_read(fd, &hdr, sizeof(hdr));
|
||||
|
||||
@ -235,6 +236,13 @@ dispatch_imsg(struct interface_info *ifi, int fd)
|
||||
case IMSG_SEND_PACKET:
|
||||
send_packet_priv(ifi, &hdr, fd);
|
||||
break;
|
||||
case IMSG_SET_INTERFACE_MTU:
|
||||
if (hdr.len < sizeof(hdr) + sizeof(u_int16_t))
|
||||
error("corrupted message received");
|
||||
|
||||
buf_read(fd, &mtu, sizeof(u_int16_t));
|
||||
interface_set_mtu_priv(ifi->name, mtu);
|
||||
break;
|
||||
default:
|
||||
error("received unknown message, code %d", hdr.code);
|
||||
}
|
||||
|
@ -36,7 +36,8 @@ enum imsg_code {
|
||||
IMSG_SCRIPT_WRITE_PARAMS,
|
||||
IMSG_SCRIPT_GO,
|
||||
IMSG_SCRIPT_GO_RET,
|
||||
IMSG_SEND_PACKET
|
||||
IMSG_SEND_PACKET,
|
||||
IMSG_SET_INTERFACE_MTU,
|
||||
};
|
||||
|
||||
struct imsg_hdr {
|
||||
|
@ -5178,6 +5178,14 @@ pmap_is_referenced(vm_page_t m)
|
||||
* XXX: The exact number of bits to check and clear is a matter that
|
||||
* should be tested and standardized at some point in the future for
|
||||
* optimal aging of shared pages.
|
||||
*
|
||||
* As an optimization, update the page's dirty field if a modified bit is
|
||||
* found while counting reference bits. This opportunistic update can be
|
||||
* performed at low cost and can eliminate the need for some future calls
|
||||
* to pmap_is_modified(). However, since this function stops after
|
||||
* finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some
|
||||
* dirty pages. Those dirty pages will only be detected by a future call
|
||||
* to pmap_is_modified().
|
||||
*/
|
||||
int
|
||||
pmap_ts_referenced(vm_page_t m)
|
||||
@ -5186,7 +5194,7 @@ pmap_ts_referenced(vm_page_t m)
|
||||
pv_entry_t pv, pvf;
|
||||
pmap_t pmap;
|
||||
pt1_entry_t *pte1p, opte1;
|
||||
pt2_entry_t *pte2p;
|
||||
pt2_entry_t *pte2p, opte2;
|
||||
vm_paddr_t pa;
|
||||
int rtval = 0;
|
||||
|
||||
@ -5205,6 +5213,14 @@ pmap_ts_referenced(vm_page_t m)
|
||||
PMAP_LOCK(pmap);
|
||||
pte1p = pmap_pte1(pmap, pv->pv_va);
|
||||
opte1 = pte1_load(pte1p);
|
||||
if (pte1_is_dirty(opte1)) {
|
||||
/*
|
||||
* Although "opte1" is mapping a 1MB page, because
|
||||
* this function is called at a 4KB page granularity,
|
||||
* we only update the 4KB page under test.
|
||||
*/
|
||||
vm_page_dirty(m);
|
||||
}
|
||||
if ((opte1 & PTE1_A) != 0) {
|
||||
/*
|
||||
* Since this reference bit is shared by 256 4KB pages,
|
||||
@ -5253,7 +5269,10 @@ pmap_ts_referenced(vm_page_t m)
|
||||
("%s: not found a link in page %p's pv list", __func__, m));
|
||||
|
||||
pte2p = pmap_pte2_quick(pmap, pv->pv_va);
|
||||
if ((pte2_load(pte2p) & PTE2_A) != 0) {
|
||||
opte2 = pte2_load(pte2p);
|
||||
if (pte2_is_dirty(opte2))
|
||||
vm_page_dirty(m);
|
||||
if ((opte2 & PTE2_A) != 0) {
|
||||
pte2_clear_bit(pte2p, PTE2_A);
|
||||
pmap_tlb_flush(pmap, pv->pv_va);
|
||||
rtval++;
|
||||
|
@ -55,7 +55,6 @@ SECTIONS
|
||||
{
|
||||
*(.data)
|
||||
*(.gnu.linkonce.d*)
|
||||
CONSTRUCTORS
|
||||
}
|
||||
.data1 : { *(.data1) }
|
||||
.got1 : { *(.got1) }
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -130,6 +130,26 @@ const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS] = {
|
||||
{ zfs_acl_byteswap, "acl" }
|
||||
};
|
||||
|
||||
int
|
||||
dmu_buf_hold_noread_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp)
|
||||
{
|
||||
uint64_t blkid;
|
||||
dmu_buf_impl_t *db;
|
||||
|
||||
blkid = dbuf_whichblock(dn, 0, offset);
|
||||
rw_enter(&dn->dn_struct_rwlock, RW_READER);
|
||||
db = dbuf_hold(dn, blkid, tag);
|
||||
rw_exit(&dn->dn_struct_rwlock);
|
||||
|
||||
if (db == NULL) {
|
||||
*dbp = NULL;
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
|
||||
*dbp = &db->db;
|
||||
return (0);
|
||||
}
|
||||
int
|
||||
dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp)
|
||||
@ -157,6 +177,29 @@ dmu_buf_hold_noread(objset_t *os, uint64_t object, uint64_t offset,
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags)
|
||||
{
|
||||
int err;
|
||||
int db_flags = DB_RF_CANFAIL;
|
||||
|
||||
if (flags & DMU_READ_NO_PREFETCH)
|
||||
db_flags |= DB_RF_NOPREFETCH;
|
||||
|
||||
err = dmu_buf_hold_noread_by_dnode(dn, offset, tag, dbp);
|
||||
if (err == 0) {
|
||||
dmu_buf_impl_t *db = (dmu_buf_impl_t *)(*dbp);
|
||||
err = dbuf_read(db, NULL, db_flags);
|
||||
if (err != 0) {
|
||||
dbuf_rele(db, tag);
|
||||
*dbp = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags)
|
||||
@ -1407,7 +1450,7 @@ void
|
||||
dmu_return_arcbuf(arc_buf_t *buf)
|
||||
{
|
||||
arc_return_buf(buf, FTAG);
|
||||
VERIFY(arc_buf_remove_ref(buf, FTAG));
|
||||
arc_buf_destroy(buf, FTAG);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1763,8 +1806,7 @@ dmu_sync(zio_t *pio, uint64_t txg, dmu_sync_cb_t *done, zgd_t *zgd)
|
||||
|
||||
zio_nowait(arc_write(pio, os->os_spa, txg,
|
||||
bp, dr->dt.dl.dr_data, DBUF_IS_L2CACHEABLE(db),
|
||||
DBUF_IS_L2COMPRESSIBLE(db), &zp, dmu_sync_ready,
|
||||
NULL, NULL, dmu_sync_done, dsa,
|
||||
&zp, dmu_sync_ready, NULL, NULL, dmu_sync_done, dsa,
|
||||
ZIO_PRIORITY_SYNC_WRITE, ZIO_FLAG_CANFAIL, &zb));
|
||||
|
||||
return (0);
|
||||
@ -2137,11 +2179,11 @@ dmu_init(void)
|
||||
xuio_stat_init();
|
||||
dmu_objset_init();
|
||||
dnode_init();
|
||||
dbuf_init();
|
||||
zfetch_init();
|
||||
zio_compress_init();
|
||||
l2arc_init();
|
||||
arc_init();
|
||||
dbuf_init();
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/dmu.h>
|
||||
@ -169,7 +169,7 @@ diff_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
if (err)
|
||||
break;
|
||||
}
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
if (err)
|
||||
return (err);
|
||||
/* Don't care about the data blocks */
|
||||
|
@ -316,8 +316,6 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
|
||||
if (DMU_OS_IS_L2CACHEABLE(os))
|
||||
aflags |= ARC_FLAG_L2CACHE;
|
||||
if (DMU_OS_IS_L2COMPRESSIBLE(os))
|
||||
aflags |= ARC_FLAG_L2COMPRESS;
|
||||
|
||||
dprintf_bp(os->os_rootbp, "reading %s", "");
|
||||
err = arc_read(NULL, spa, os->os_rootbp,
|
||||
@ -334,14 +332,13 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
/* Increase the blocksize if we are permitted. */
|
||||
if (spa_version(spa) >= SPA_VERSION_USERSPACE &&
|
||||
arc_buf_size(os->os_phys_buf) < sizeof (objset_phys_t)) {
|
||||
arc_buf_t *buf = arc_buf_alloc(spa,
|
||||
arc_buf_t *buf = arc_alloc_buf(spa,
|
||||
sizeof (objset_phys_t), &os->os_phys_buf,
|
||||
ARC_BUFC_METADATA);
|
||||
bzero(buf->b_data, sizeof (objset_phys_t));
|
||||
bcopy(os->os_phys_buf->b_data, buf->b_data,
|
||||
arc_buf_size(os->os_phys_buf));
|
||||
(void) arc_buf_remove_ref(os->os_phys_buf,
|
||||
&os->os_phys_buf);
|
||||
arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
|
||||
os->os_phys_buf = buf;
|
||||
}
|
||||
|
||||
@ -350,7 +347,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
} else {
|
||||
int size = spa_version(spa) >= SPA_VERSION_USERSPACE ?
|
||||
sizeof (objset_phys_t) : OBJSET_OLD_PHYS_SIZE;
|
||||
os->os_phys_buf = arc_buf_alloc(spa, size,
|
||||
os->os_phys_buf = arc_alloc_buf(spa, size,
|
||||
&os->os_phys_buf, ARC_BUFC_METADATA);
|
||||
os->os_phys = os->os_phys_buf->b_data;
|
||||
bzero(os->os_phys, size);
|
||||
@ -428,8 +425,7 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
if (needlock)
|
||||
dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
|
||||
if (err != 0) {
|
||||
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
|
||||
&os->os_phys_buf));
|
||||
arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
|
||||
kmem_free(os, sizeof (objset_t));
|
||||
return (err);
|
||||
}
|
||||
@ -731,7 +727,7 @@ dmu_objset_evict_done(objset_t *os)
|
||||
}
|
||||
zil_free(os->os_zil);
|
||||
|
||||
VERIFY(arc_buf_remove_ref(os->os_phys_buf, &os->os_phys_buf));
|
||||
arc_buf_destroy(os->os_phys_buf, &os->os_phys_buf);
|
||||
|
||||
/*
|
||||
* This is a barrier to prevent the objset from going away in
|
||||
@ -1128,7 +1124,6 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
||||
|
||||
zio = arc_write(pio, os->os_spa, tx->tx_txg,
|
||||
os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
|
||||
DMU_OS_IS_L2COMPRESSIBLE(os),
|
||||
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
|
||||
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||
|
||||
|
@ -160,11 +160,16 @@ dump_record(dmu_sendarg_t *dsp, void *payload, int payload_len)
|
||||
fletcher_4_incremental_native(dsp->dsa_drr,
|
||||
offsetof(dmu_replay_record_t, drr_u.drr_checksum.drr_checksum),
|
||||
&dsp->dsa_zc);
|
||||
if (dsp->dsa_drr->drr_type != DRR_BEGIN) {
|
||||
if (dsp->dsa_drr->drr_type == DRR_BEGIN) {
|
||||
dsp->dsa_sent_begin = B_TRUE;
|
||||
} else {
|
||||
ASSERT(ZIO_CHECKSUM_IS_ZERO(&dsp->dsa_drr->drr_u.
|
||||
drr_checksum.drr_checksum));
|
||||
dsp->dsa_drr->drr_u.drr_checksum.drr_checksum = dsp->dsa_zc;
|
||||
}
|
||||
if (dsp->dsa_drr->drr_type == DRR_END) {
|
||||
dsp->dsa_sent_end = B_TRUE;
|
||||
}
|
||||
fletcher_4_incremental_native(&dsp->dsa_drr->
|
||||
drr_u.drr_checksum.drr_checksum,
|
||||
sizeof (zio_cksum_t), &dsp->dsa_zc);
|
||||
@ -634,7 +639,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
|
||||
if (err != 0)
|
||||
break;
|
||||
}
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
} else if (type == DMU_OT_SA) {
|
||||
arc_flags_t aflags = ARC_FLAG_WAIT;
|
||||
arc_buf_t *abuf;
|
||||
@ -646,7 +651,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
|
||||
return (SET_ERROR(EIO));
|
||||
|
||||
err = dump_spill(dsa, zb->zb_object, blksz, abuf->b_data);
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
} else if (backup_do_embed(dsa, bp)) {
|
||||
/* it's an embedded level-0 block of a regular object */
|
||||
int blksz = dblkszsec << SPA_MINBLOCKSHIFT;
|
||||
@ -670,7 +675,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
|
||||
&aflags, zb) != 0) {
|
||||
if (zfs_send_corrupt_data) {
|
||||
/* Send a block filled with 0x"zfs badd bloc" */
|
||||
abuf = arc_buf_alloc(spa, blksz, &abuf,
|
||||
abuf = arc_alloc_buf(spa, blksz, &abuf,
|
||||
ARC_BUFC_DATA);
|
||||
uint64_t *ptr;
|
||||
for (ptr = abuf->b_data;
|
||||
@ -700,7 +705,7 @@ do_dump(dmu_sendarg_t *dsa, struct send_block_record *data)
|
||||
err = dump_write(dsa, type, zb->zb_object,
|
||||
offset, blksz, bp, abuf->b_data);
|
||||
}
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
}
|
||||
|
||||
ASSERT(err == 0 || err == EINTR);
|
||||
@ -912,6 +917,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *to_ds,
|
||||
list_remove(&to_ds->ds_sendstreams, dsp);
|
||||
mutex_exit(&to_ds->ds_sendstream_lock);
|
||||
|
||||
VERIFY(err != 0 || (dsp->dsa_sent_begin && dsp->dsa_sent_end));
|
||||
|
||||
kmem_free(drr, sizeof (dmu_replay_record_t));
|
||||
kmem_free(dsp, sizeof (dmu_sendarg_t));
|
||||
|
||||
@ -3106,6 +3113,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
|
||||
dsl_dataset_phys(origin_head)->ds_flags &=
|
||||
~DS_FLAG_INCONSISTENT;
|
||||
|
||||
drc->drc_newsnapobj =
|
||||
dsl_dataset_phys(origin_head)->ds_prev_snap_obj;
|
||||
|
||||
dsl_dataset_rele(origin_head, FTAG);
|
||||
dsl_destroy_head_sync_impl(drc->drc_ds, tx);
|
||||
|
||||
@ -3141,8 +3151,9 @@ dmu_recv_end_sync(void *arg, dmu_tx_t *tx)
|
||||
(void) zap_remove(dp->dp_meta_objset, ds->ds_object,
|
||||
DS_FIELD_RESUME_TONAME, tx);
|
||||
}
|
||||
drc->drc_newsnapobj =
|
||||
dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
|
||||
}
|
||||
drc->drc_newsnapobj = dsl_dataset_phys(drc->drc_ds)->ds_prev_snap_obj;
|
||||
/*
|
||||
* Release the hold from dmu_recv_begin. This must be done before
|
||||
* we return to open context, so that when we free the dataset's dnode,
|
||||
@ -3184,8 +3195,6 @@ static int dmu_recv_end_modified_blocks = 3;
|
||||
static int
|
||||
dmu_recv_existing_end(dmu_recv_cookie_t *drc)
|
||||
{
|
||||
int error;
|
||||
|
||||
#ifdef _KERNEL
|
||||
/*
|
||||
* We will be destroying the ds; make sure its origin is unmounted if
|
||||
@ -3196,23 +3205,30 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
|
||||
zfs_destroy_unmount_origin(name);
|
||||
#endif
|
||||
|
||||
error = dsl_sync_task(drc->drc_tofs,
|
||||
return (dsl_sync_task(drc->drc_tofs,
|
||||
dmu_recv_end_check, dmu_recv_end_sync, drc,
|
||||
dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL);
|
||||
|
||||
if (error != 0)
|
||||
dmu_recv_cleanup_ds(drc);
|
||||
return (error);
|
||||
dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL));
|
||||
}
|
||||
|
||||
static int
|
||||
dmu_recv_new_end(dmu_recv_cookie_t *drc)
|
||||
{
|
||||
return (dsl_sync_task(drc->drc_tofs,
|
||||
dmu_recv_end_check, dmu_recv_end_sync, drc,
|
||||
dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL));
|
||||
}
|
||||
|
||||
int
|
||||
dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = dsl_sync_task(drc->drc_tofs,
|
||||
dmu_recv_end_check, dmu_recv_end_sync, drc,
|
||||
dmu_recv_end_modified_blocks, ZFS_SPACE_CHECK_NORMAL);
|
||||
drc->drc_owner = owner;
|
||||
|
||||
if (drc->drc_newfs)
|
||||
error = dmu_recv_new_end(drc);
|
||||
else
|
||||
error = dmu_recv_existing_end(drc);
|
||||
|
||||
if (error != 0) {
|
||||
dmu_recv_cleanup_ds(drc);
|
||||
@ -3224,17 +3240,6 @@ dmu_recv_new_end(dmu_recv_cookie_t *drc)
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_recv_end(dmu_recv_cookie_t *drc, void *owner)
|
||||
{
|
||||
drc->drc_owner = owner;
|
||||
|
||||
if (drc->drc_newfs)
|
||||
return (dmu_recv_new_end(drc));
|
||||
else
|
||||
return (dmu_recv_existing_end(drc));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return TRUE if this objset is currently being received into.
|
||||
*/
|
||||
|
@ -380,7 +380,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
|
||||
}
|
||||
|
||||
if (buf)
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
arc_buf_destroy(buf, &buf);
|
||||
|
||||
post:
|
||||
if (err == 0 && (td->td_flags & TRAVERSE_POST))
|
||||
@ -595,7 +595,7 @@ traverse_impl(spa_t *spa, dsl_dataset_t *ds, uint64_t objset, blkptr_t *rootbp,
|
||||
|
||||
osp = buf->b_data;
|
||||
traverse_zil(&td, &osp->os_zil_header);
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
arc_buf_destroy(buf, &buf);
|
||||
}
|
||||
|
||||
if (!(flags & TRAVERSE_PREFETCH_DATA) ||
|
||||
|
@ -21,7 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
*/
|
||||
|
||||
@ -808,15 +808,14 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
* access the name in this fat-zap so that we'll check
|
||||
* for i/o errors to the leaf blocks, etc.
|
||||
*/
|
||||
err = zap_lookup(dn->dn_objset, dn->dn_object, name,
|
||||
8, 0, NULL);
|
||||
err = zap_lookup_by_dnode(dn, name, 8, 0, NULL);
|
||||
if (err == EIO) {
|
||||
tx->tx_err = err;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
err = zap_count_write(dn->dn_objset, dn->dn_object, name, add,
|
||||
err = zap_count_write_by_dnode(dn, name, add,
|
||||
&txh->txh_space_towrite, &txh->txh_space_tooverwrite);
|
||||
|
||||
/*
|
||||
|
@ -512,7 +512,7 @@ dnode_destroy(dnode_t *dn)
|
||||
}
|
||||
if (dn->dn_bonus != NULL) {
|
||||
mutex_enter(&dn->dn_bonus->db_mtx);
|
||||
dbuf_evict(dn->dn_bonus);
|
||||
dbuf_destroy(dn->dn_bonus);
|
||||
dn->dn_bonus = NULL;
|
||||
}
|
||||
dn->dn_zio = NULL;
|
||||
|
@ -413,7 +413,7 @@ dnode_evict_dbufs(dnode_t *dn)
|
||||
avl_insert_here(&dn->dn_dbufs, &db_marker, db,
|
||||
AVL_BEFORE);
|
||||
|
||||
dbuf_clear(db);
|
||||
dbuf_destroy(db);
|
||||
|
||||
db_next = AVL_NEXT(&dn->dn_dbufs, &db_marker);
|
||||
avl_remove(&dn->dn_dbufs, &db_marker);
|
||||
@ -435,7 +435,7 @@ dnode_evict_bonus(dnode_t *dn)
|
||||
if (dn->dn_bonus != NULL) {
|
||||
if (refcount_is_zero(&dn->dn_bonus->db_holds)) {
|
||||
mutex_enter(&dn->dn_bonus->db_mtx);
|
||||
dbuf_evict(dn->dn_bonus);
|
||||
dbuf_destroy(dn->dn_bonus);
|
||||
dn->dn_bonus = NULL;
|
||||
} else {
|
||||
dn->dn_bonus->db_pending_evict = TRUE;
|
||||
|
@ -1060,19 +1060,6 @@ dsl_dataset_get_blkptr(dsl_dataset_t *ds)
|
||||
return (&dsl_dataset_phys(ds)->ds_bp);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
/* If it's the meta-objset, set dp_meta_rootbp */
|
||||
if (ds == NULL) {
|
||||
tx->tx_pool->dp_meta_rootbp = *bp;
|
||||
} else {
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
dsl_dataset_phys(ds)->ds_bp = *bp;
|
||||
}
|
||||
}
|
||||
|
||||
spa_t *
|
||||
dsl_dataset_get_spa(dsl_dataset_t *ds)
|
||||
{
|
||||
|
@ -679,7 +679,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
dsl_scan_visitbp(cbp, &czb, dnp,
|
||||
ds, scn, ostype, tx);
|
||||
}
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
arc_buf_destroy(buf, &buf);
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_DNODE) {
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
dnode_phys_t *cdnp;
|
||||
@ -705,7 +705,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
cdnp, zb->zb_blkid * epb + i, tx);
|
||||
}
|
||||
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
arc_buf_destroy(buf, &buf);
|
||||
} else if (BP_GET_TYPE(bp) == DMU_OT_OBJSET) {
|
||||
arc_flags_t flags = ARC_FLAG_WAIT;
|
||||
objset_phys_t *osp;
|
||||
@ -737,7 +737,7 @@ dsl_scan_recurse(dsl_scan_t *scn, dsl_dataset_t *ds, dmu_objset_type_t ostype,
|
||||
&osp->os_userused_dnode,
|
||||
DMU_USERUSED_OBJECT, tx);
|
||||
}
|
||||
(void) arc_buf_remove_ref(buf, &buf);
|
||||
arc_buf_destroy(buf, &buf);
|
||||
}
|
||||
|
||||
return (0);
|
||||
|
@ -38,17 +38,8 @@
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, metaslab, CTLFLAG_RW, 0, "ZFS metaslab");
|
||||
|
||||
/*
|
||||
* Allow allocations to switch to gang blocks quickly. We do this to
|
||||
* avoid having to load lots of space_maps in a given txg. There are,
|
||||
* however, some cases where we want to avoid "fast" ganging and instead
|
||||
* we want to do an exhaustive search of all metaslabs on this device.
|
||||
* Currently we don't allow any gang, slog, or dump device related allocations
|
||||
* to "fast" gang.
|
||||
*/
|
||||
#define CAN_FASTGANG(flags) \
|
||||
(!((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER | \
|
||||
METASLAB_GANG_AVOID)))
|
||||
#define GANG_ALLOCATION(flags) \
|
||||
((flags) & (METASLAB_GANG_CHILD | METASLAB_GANG_HEADER))
|
||||
|
||||
#define METASLAB_WEIGHT_PRIMARY (1ULL << 63)
|
||||
#define METASLAB_WEIGHT_SECONDARY (1ULL << 62)
|
||||
@ -256,6 +247,8 @@ metaslab_class_create(spa_t *spa, metaslab_ops_t *ops)
|
||||
mc->mc_spa = spa;
|
||||
mc->mc_rotor = NULL;
|
||||
mc->mc_ops = ops;
|
||||
mutex_init(&mc->mc_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
refcount_create_tracked(&mc->mc_alloc_slots);
|
||||
|
||||
return (mc);
|
||||
}
|
||||
@ -269,6 +262,8 @@ metaslab_class_destroy(metaslab_class_t *mc)
|
||||
ASSERT(mc->mc_space == 0);
|
||||
ASSERT(mc->mc_dspace == 0);
|
||||
|
||||
refcount_destroy(&mc->mc_alloc_slots);
|
||||
mutex_destroy(&mc->mc_lock);
|
||||
kmem_free(mc, sizeof (metaslab_class_t));
|
||||
}
|
||||
|
||||
@ -468,7 +463,13 @@ metaslab_class_expandable_space(metaslab_class_t *mc)
|
||||
continue;
|
||||
}
|
||||
|
||||
space += tvd->vdev_max_asize - tvd->vdev_asize;
|
||||
/*
|
||||
* Calculate if we have enough space to add additional
|
||||
* metaslabs. We report the expandable space in terms
|
||||
* of the metaslab size since that's the unit of expansion.
|
||||
*/
|
||||
space += P2ALIGN(tvd->vdev_max_asize - tvd->vdev_asize,
|
||||
1ULL << tvd->vdev_ms_shift);
|
||||
}
|
||||
spa_config_exit(mc->mc_spa, SCL_VDEV, FTAG);
|
||||
return (space);
|
||||
@ -506,9 +507,10 @@ metaslab_compare(const void *x1, const void *x2)
|
||||
/*
|
||||
* Update the allocatable flag and the metaslab group's capacity.
|
||||
* The allocatable flag is set to true if the capacity is below
|
||||
* the zfs_mg_noalloc_threshold. If a metaslab group transitions
|
||||
* from allocatable to non-allocatable or vice versa then the metaslab
|
||||
* group's class is updated to reflect the transition.
|
||||
* the zfs_mg_noalloc_threshold or has a fragmentation value that is
|
||||
* greater than zfs_mg_fragmentation_threshold. If a metaslab group
|
||||
* transitions from allocatable to non-allocatable or vice versa then the
|
||||
* metaslab group's class is updated to reflect the transition.
|
||||
*/
|
||||
static void
|
||||
metaslab_group_alloc_update(metaslab_group_t *mg)
|
||||
@ -517,22 +519,45 @@ metaslab_group_alloc_update(metaslab_group_t *mg)
|
||||
metaslab_class_t *mc = mg->mg_class;
|
||||
vdev_stat_t *vs = &vd->vdev_stat;
|
||||
boolean_t was_allocatable;
|
||||
boolean_t was_initialized;
|
||||
|
||||
ASSERT(vd == vd->vdev_top);
|
||||
|
||||
mutex_enter(&mg->mg_lock);
|
||||
was_allocatable = mg->mg_allocatable;
|
||||
was_initialized = mg->mg_initialized;
|
||||
|
||||
mg->mg_free_capacity = ((vs->vs_space - vs->vs_alloc) * 100) /
|
||||
(vs->vs_space + 1);
|
||||
|
||||
mutex_enter(&mc->mc_lock);
|
||||
|
||||
/*
|
||||
* If the metaslab group was just added then it won't
|
||||
* have any space until we finish syncing out this txg.
|
||||
* At that point we will consider it initialized and available
|
||||
* for allocations. We also don't consider non-activated
|
||||
* metaslab groups (e.g. vdevs that are in the middle of being removed)
|
||||
* to be initialized, because they can't be used for allocation.
|
||||
*/
|
||||
mg->mg_initialized = metaslab_group_initialized(mg);
|
||||
if (!was_initialized && mg->mg_initialized) {
|
||||
mc->mc_groups++;
|
||||
} else if (was_initialized && !mg->mg_initialized) {
|
||||
ASSERT3U(mc->mc_groups, >, 0);
|
||||
mc->mc_groups--;
|
||||
}
|
||||
if (mg->mg_initialized)
|
||||
mg->mg_no_free_space = B_FALSE;
|
||||
|
||||
/*
|
||||
* A metaslab group is considered allocatable if it has plenty
|
||||
* of free space or is not heavily fragmented. We only take
|
||||
* fragmentation into account if the metaslab group has a valid
|
||||
* fragmentation metric (i.e. a value between 0 and 100).
|
||||
*/
|
||||
mg->mg_allocatable = (mg->mg_free_capacity > zfs_mg_noalloc_threshold &&
|
||||
mg->mg_allocatable = (mg->mg_activation_count > 0 &&
|
||||
mg->mg_free_capacity > zfs_mg_noalloc_threshold &&
|
||||
(mg->mg_fragmentation == ZFS_FRAG_INVALID ||
|
||||
mg->mg_fragmentation <= zfs_mg_fragmentation_threshold));
|
||||
|
||||
@ -555,6 +580,7 @@ metaslab_group_alloc_update(metaslab_group_t *mg)
|
||||
mc->mc_alloc_groups--;
|
||||
else if (!was_allocatable && mg->mg_allocatable)
|
||||
mc->mc_alloc_groups++;
|
||||
mutex_exit(&mc->mc_lock);
|
||||
|
||||
mutex_exit(&mg->mg_lock);
|
||||
}
|
||||
@ -571,6 +597,9 @@ metaslab_group_create(metaslab_class_t *mc, vdev_t *vd)
|
||||
mg->mg_vd = vd;
|
||||
mg->mg_class = mc;
|
||||
mg->mg_activation_count = 0;
|
||||
mg->mg_initialized = B_FALSE;
|
||||
mg->mg_no_free_space = B_TRUE;
|
||||
refcount_create_tracked(&mg->mg_alloc_queue_depth);
|
||||
|
||||
mg->mg_taskq = taskq_create("metaslab_group_taskq", metaslab_load_pct,
|
||||
minclsyspri, 10, INT_MAX, TASKQ_THREADS_CPU_PCT);
|
||||
@ -593,6 +622,7 @@ metaslab_group_destroy(metaslab_group_t *mg)
|
||||
taskq_destroy(mg->mg_taskq);
|
||||
avl_destroy(&mg->mg_metaslab_tree);
|
||||
mutex_destroy(&mg->mg_lock);
|
||||
refcount_destroy(&mg->mg_alloc_queue_depth);
|
||||
kmem_free(mg, sizeof (metaslab_group_t));
|
||||
}
|
||||
|
||||
@ -664,6 +694,15 @@ metaslab_group_passivate(metaslab_group_t *mg)
|
||||
metaslab_class_minblocksize_update(mc);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
metaslab_group_initialized(metaslab_group_t *mg)
|
||||
{
|
||||
vdev_t *vd = mg->mg_vd;
|
||||
vdev_stat_t *vs = &vd->vdev_stat;
|
||||
|
||||
return (vs->vs_space != 0 && mg->mg_activation_count > 0);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
metaslab_group_get_space(metaslab_group_t *mg)
|
||||
{
|
||||
@ -833,30 +872,97 @@ metaslab_group_fragmentation(metaslab_group_t *mg)
|
||||
* group should avoid allocations if its free capacity is less than the
|
||||
* zfs_mg_noalloc_threshold or its fragmentation metric is greater than
|
||||
* zfs_mg_fragmentation_threshold and there is at least one metaslab group
|
||||
* that can still handle allocations.
|
||||
* that can still handle allocations. If the allocation throttle is enabled
|
||||
* then we skip allocations to devices that have reached their maximum
|
||||
* allocation queue depth unless the selected metaslab group is the only
|
||||
* eligible group remaining.
|
||||
*/
|
||||
static boolean_t
|
||||
metaslab_group_allocatable(metaslab_group_t *mg)
|
||||
metaslab_group_allocatable(metaslab_group_t *mg, metaslab_group_t *rotor,
|
||||
uint64_t psize)
|
||||
{
|
||||
vdev_t *vd = mg->mg_vd;
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
spa_t *spa = mg->mg_vd->vdev_spa;
|
||||
metaslab_class_t *mc = mg->mg_class;
|
||||
|
||||
/*
|
||||
* We use two key metrics to determine if a metaslab group is
|
||||
* considered allocatable -- free space and fragmentation. If
|
||||
* the free space is greater than the free space threshold and
|
||||
* the fragmentation is less than the fragmentation threshold then
|
||||
* consider the group allocatable. There are two case when we will
|
||||
* not consider these key metrics. The first is if the group is
|
||||
* associated with a slog device and the second is if all groups
|
||||
* in this metaslab class have already been consider ineligible
|
||||
* We can only consider skipping this metaslab group if it's
|
||||
* in the normal metaslab class and there are other metaslab
|
||||
* groups to select from. Otherwise, we always consider it eligible
|
||||
* for allocations.
|
||||
*/
|
||||
return ((mg->mg_free_capacity > zfs_mg_noalloc_threshold &&
|
||||
(mg->mg_fragmentation == ZFS_FRAG_INVALID ||
|
||||
mg->mg_fragmentation <= zfs_mg_fragmentation_threshold)) ||
|
||||
mc != spa_normal_class(spa) || mc->mc_alloc_groups == 0);
|
||||
if (mc != spa_normal_class(spa) || mc->mc_groups <= 1)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* If the metaslab group's mg_allocatable flag is set (see comments
|
||||
* in metaslab_group_alloc_update() for more information) and
|
||||
* the allocation throttle is disabled then allow allocations to this
|
||||
* device. However, if the allocation throttle is enabled then
|
||||
* check if we have reached our allocation limit (mg_alloc_queue_depth)
|
||||
* to determine if we should allow allocations to this metaslab group.
|
||||
* If all metaslab groups are no longer considered allocatable
|
||||
* (mc_alloc_groups == 0) or we're trying to allocate the smallest
|
||||
* gang block size then we allow allocations on this metaslab group
|
||||
* regardless of the mg_allocatable or throttle settings.
|
||||
*/
|
||||
if (mg->mg_allocatable) {
|
||||
metaslab_group_t *mgp;
|
||||
int64_t qdepth;
|
||||
uint64_t qmax = mg->mg_max_alloc_queue_depth;
|
||||
|
||||
if (!mc->mc_alloc_throttle_enabled)
|
||||
return (B_TRUE);
|
||||
|
||||
/*
|
||||
* If this metaslab group does not have any free space, then
|
||||
* there is no point in looking further.
|
||||
*/
|
||||
if (mg->mg_no_free_space)
|
||||
return (B_FALSE);
|
||||
|
||||
qdepth = refcount_count(&mg->mg_alloc_queue_depth);
|
||||
|
||||
/*
|
||||
* If this metaslab group is below its qmax or it's
|
||||
* the only allocatable metasable group, then attempt
|
||||
* to allocate from it.
|
||||
*/
|
||||
if (qdepth < qmax || mc->mc_alloc_groups == 1)
|
||||
return (B_TRUE);
|
||||
ASSERT3U(mc->mc_alloc_groups, >, 1);
|
||||
|
||||
/*
|
||||
* Since this metaslab group is at or over its qmax, we
|
||||
* need to determine if there are metaslab groups after this
|
||||
* one that might be able to handle this allocation. This is
|
||||
* racy since we can't hold the locks for all metaslab
|
||||
* groups at the same time when we make this check.
|
||||
*/
|
||||
for (mgp = mg->mg_next; mgp != rotor; mgp = mgp->mg_next) {
|
||||
qmax = mgp->mg_max_alloc_queue_depth;
|
||||
|
||||
qdepth = refcount_count(&mgp->mg_alloc_queue_depth);
|
||||
|
||||
/*
|
||||
* If there is another metaslab group that
|
||||
* might be able to handle the allocation, then
|
||||
* we return false so that we skip this group.
|
||||
*/
|
||||
if (qdepth < qmax && !mgp->mg_no_free_space)
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* We didn't find another group to handle the allocation
|
||||
* so we can't skip this metaslab group even though
|
||||
* we are at or over our qmax.
|
||||
*/
|
||||
return (B_TRUE);
|
||||
|
||||
} else if (mc->mc_alloc_groups == 0 || psize == SPA_MINBLOCKSIZE) {
|
||||
return (B_TRUE);
|
||||
}
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2124,8 +2230,57 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Metaslab block operations
|
||||
* ==========================================================================
|
||||
*/
|
||||
|
||||
static void
|
||||
metaslab_group_alloc_increment(spa_t *spa, uint64_t vdev, void *tag, int flags)
|
||||
{
|
||||
if (!(flags & METASLAB_ASYNC_ALLOC) ||
|
||||
flags & METASLAB_DONT_THROTTLE)
|
||||
return;
|
||||
|
||||
metaslab_group_t *mg = vdev_lookup_top(spa, vdev)->vdev_mg;
|
||||
if (!mg->mg_class->mc_alloc_throttle_enabled)
|
||||
return;
|
||||
|
||||
(void) refcount_add(&mg->mg_alloc_queue_depth, tag);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_group_alloc_decrement(spa_t *spa, uint64_t vdev, void *tag, int flags)
|
||||
{
|
||||
if (!(flags & METASLAB_ASYNC_ALLOC) ||
|
||||
flags & METASLAB_DONT_THROTTLE)
|
||||
return;
|
||||
|
||||
metaslab_group_t *mg = vdev_lookup_top(spa, vdev)->vdev_mg;
|
||||
if (!mg->mg_class->mc_alloc_throttle_enabled)
|
||||
return;
|
||||
|
||||
(void) refcount_remove(&mg->mg_alloc_queue_depth, tag);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_group_alloc_verify(spa_t *spa, const blkptr_t *bp, void *tag)
|
||||
{
|
||||
#ifdef ZFS_DEBUG
|
||||
const dva_t *dva = bp->blk_dva;
|
||||
int ndvas = BP_GET_NDVAS(bp);
|
||||
|
||||
for (int d = 0; d < ndvas; d++) {
|
||||
uint64_t vdev = DVA_GET_VDEV(&dva[d]);
|
||||
metaslab_group_t *mg = vdev_lookup_top(spa, vdev)->vdev_mg;
|
||||
VERIFY(refcount_not_held(&mg->mg_alloc_queue_depth, tag));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
||||
metaslab_group_alloc(metaslab_group_t *mg, uint64_t asize,
|
||||
uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
|
||||
{
|
||||
spa_t *spa = mg->mg_vd->vdev_spa;
|
||||
@ -2152,10 +2307,10 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
||||
if (msp->ms_weight < asize) {
|
||||
spa_dbgmsg(spa, "%s: failed to meet weight "
|
||||
"requirement: vdev %llu, txg %llu, mg %p, "
|
||||
"msp %p, psize %llu, asize %llu, "
|
||||
"msp %p, asize %llu, "
|
||||
"weight %llu", spa_name(spa),
|
||||
mg->mg_vd->vdev_id, txg,
|
||||
mg, msp, psize, asize, msp->ms_weight);
|
||||
mg, msp, asize, msp->ms_weight);
|
||||
mutex_exit(&mg->mg_lock);
|
||||
return (-1ULL);
|
||||
}
|
||||
@ -2237,7 +2392,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
|
||||
msp->ms_access_txg = txg + metaslab_unload_delay;
|
||||
|
||||
mutex_exit(&msp->ms_lock);
|
||||
|
||||
return (offset);
|
||||
}
|
||||
|
||||
@ -2254,7 +2408,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
int all_zero;
|
||||
int zio_lock = B_FALSE;
|
||||
boolean_t allocatable;
|
||||
uint64_t offset = -1ULL;
|
||||
uint64_t asize;
|
||||
uint64_t distance;
|
||||
|
||||
@ -2324,7 +2477,6 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
all_zero = B_TRUE;
|
||||
do {
|
||||
ASSERT(mg->mg_activation_count == 1);
|
||||
|
||||
vd = mg->mg_vd;
|
||||
|
||||
/*
|
||||
@ -2340,24 +2492,23 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
|
||||
/*
|
||||
* Determine if the selected metaslab group is eligible
|
||||
* for allocations. If we're ganging or have requested
|
||||
* an allocation for the smallest gang block size
|
||||
* then we don't want to avoid allocating to the this
|
||||
* metaslab group. If we're in this condition we should
|
||||
* try to allocate from any device possible so that we
|
||||
* don't inadvertently return ENOSPC and suspend the pool
|
||||
* for allocations. If we're ganging then don't allow
|
||||
* this metaslab group to skip allocations since that would
|
||||
* inadvertently return ENOSPC and suspend the pool
|
||||
* even though space is still available.
|
||||
*/
|
||||
if (allocatable && CAN_FASTGANG(flags) &&
|
||||
psize > SPA_GANGBLOCKSIZE)
|
||||
allocatable = metaslab_group_allocatable(mg);
|
||||
if (allocatable && !GANG_ALLOCATION(flags) && !zio_lock) {
|
||||
allocatable = metaslab_group_allocatable(mg, rotor,
|
||||
psize);
|
||||
}
|
||||
|
||||
if (!allocatable)
|
||||
goto next;
|
||||
|
||||
ASSERT(mg->mg_initialized);
|
||||
|
||||
/*
|
||||
* Avoid writing single-copy data to a failing vdev
|
||||
* unless the user instructs us that it is okay.
|
||||
* Avoid writing single-copy data to a failing vdev.
|
||||
*/
|
||||
if ((vd->vdev_stat.vs_write_errors > 0 ||
|
||||
vd->vdev_state < VDEV_STATE_HEALTHY) &&
|
||||
@ -2377,8 +2528,32 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
|
||||
asize = vdev_psize_to_asize(vd, psize);
|
||||
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
|
||||
|
||||
offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
|
||||
dva, d);
|
||||
uint64_t offset = metaslab_group_alloc(mg, asize, txg,
|
||||
distance, dva, d);
|
||||
|
||||
mutex_enter(&mg->mg_lock);
|
||||
if (offset == -1ULL) {
|
||||
mg->mg_failed_allocations++;
|
||||
if (asize == SPA_GANGBLOCKSIZE) {
|
||||
/*
|
||||
* This metaslab group was unable to allocate
|
||||
* the minimum gang block size so it must be
|
||||
* out of space. We must notify the allocation
|
||||
* throttle to start skipping allocation
|
||||
* attempts to this metaslab group until more
|
||||
* space becomes available.
|
||||
*
|
||||
* Note: this failure cannot be caused by the
|
||||
* allocation throttle since the allocation
|
||||
* throttle is only responsible for skipping
|
||||
* devices and not failing block allocations.
|
||||
*/
|
||||
mg->mg_no_free_space = B_TRUE;
|
||||
}
|
||||
}
|
||||
mg->mg_allocations++;
|
||||
mutex_exit(&mg->mg_lock);
|
||||
|
||||
if (offset != -1ULL) {
|
||||
/*
|
||||
* If we've just selected this metaslab group,
|
||||
@ -2559,9 +2734,57 @@ metaslab_claim_dva(spa_t *spa, const dva_t *dva, uint64_t txg)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reserve some allocation slots. The reservation system must be called
|
||||
* before we call into the allocator. If there aren't any available slots
|
||||
* then the I/O will be throttled until an I/O completes and its slots are
|
||||
* freed up. The function returns true if it was successful in placing
|
||||
* the reservation.
|
||||
*/
|
||||
boolean_t
|
||||
metaslab_class_throttle_reserve(metaslab_class_t *mc, int slots, zio_t *zio,
|
||||
int flags)
|
||||
{
|
||||
uint64_t available_slots = 0;
|
||||
boolean_t slot_reserved = B_FALSE;
|
||||
|
||||
ASSERT(mc->mc_alloc_throttle_enabled);
|
||||
mutex_enter(&mc->mc_lock);
|
||||
|
||||
uint64_t reserved_slots = refcount_count(&mc->mc_alloc_slots);
|
||||
if (reserved_slots < mc->mc_alloc_max_slots)
|
||||
available_slots = mc->mc_alloc_max_slots - reserved_slots;
|
||||
|
||||
if (slots <= available_slots || GANG_ALLOCATION(flags)) {
|
||||
/*
|
||||
* We reserve the slots individually so that we can unreserve
|
||||
* them individually when an I/O completes.
|
||||
*/
|
||||
for (int d = 0; d < slots; d++) {
|
||||
reserved_slots = refcount_add(&mc->mc_alloc_slots, zio);
|
||||
}
|
||||
zio->io_flags |= ZIO_FLAG_IO_ALLOCATING;
|
||||
slot_reserved = B_TRUE;
|
||||
}
|
||||
|
||||
mutex_exit(&mc->mc_lock);
|
||||
return (slot_reserved);
|
||||
}
|
||||
|
||||
void
|
||||
metaslab_class_throttle_unreserve(metaslab_class_t *mc, int slots, zio_t *zio)
|
||||
{
|
||||
ASSERT(mc->mc_alloc_throttle_enabled);
|
||||
mutex_enter(&mc->mc_lock);
|
||||
for (int d = 0; d < slots; d++) {
|
||||
(void) refcount_remove(&mc->mc_alloc_slots, zio);
|
||||
}
|
||||
mutex_exit(&mc->mc_lock);
|
||||
}
|
||||
|
||||
int
|
||||
metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
|
||||
int ndvas, uint64_t txg, blkptr_t *hintbp, int flags)
|
||||
int ndvas, uint64_t txg, blkptr_t *hintbp, int flags, zio_t *zio)
|
||||
{
|
||||
dva_t *dva = bp->blk_dva;
|
||||
dva_t *hintdva = hintbp->blk_dva;
|
||||
@ -2587,11 +2810,21 @@ metaslab_alloc(spa_t *spa, metaslab_class_t *mc, uint64_t psize, blkptr_t *bp,
|
||||
if (error != 0) {
|
||||
for (d--; d >= 0; d--) {
|
||||
metaslab_free_dva(spa, &dva[d], txg, B_TRUE);
|
||||
metaslab_group_alloc_decrement(spa,
|
||||
DVA_GET_VDEV(&dva[d]), zio, flags);
|
||||
bzero(&dva[d], sizeof (dva_t));
|
||||
}
|
||||
spa_config_exit(spa, SCL_ALLOC, FTAG);
|
||||
return (error);
|
||||
} else {
|
||||
/*
|
||||
* Update the metaslab group's queue depth
|
||||
* based on the newly allocated dva.
|
||||
*/
|
||||
metaslab_group_alloc_increment(spa,
|
||||
DVA_GET_VDEV(&dva[d]), zio, flags);
|
||||
}
|
||||
|
||||
}
|
||||
ASSERT(error == 0);
|
||||
ASSERT(BP_GET_NDVAS(bp) == ndvas);
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -72,6 +72,13 @@ refcount_create(refcount_t *rc)
|
||||
rc->rc_tracked = reference_tracking_enable;
|
||||
}
|
||||
|
||||
void
|
||||
refcount_create_tracked(refcount_t *rc)
|
||||
{
|
||||
refcount_create(rc);
|
||||
rc->rc_tracked = B_TRUE;
|
||||
}
|
||||
|
||||
void
|
||||
refcount_create_untracked(refcount_t *rc)
|
||||
{
|
||||
@ -231,4 +238,84 @@ refcount_transfer(refcount_t *dst, refcount_t *src)
|
||||
list_destroy(&removed);
|
||||
}
|
||||
|
||||
void
|
||||
refcount_transfer_ownership(refcount_t *rc, void *current_holder,
|
||||
void *new_holder)
|
||||
{
|
||||
reference_t *ref;
|
||||
boolean_t found = B_FALSE;
|
||||
|
||||
mutex_enter(&rc->rc_mtx);
|
||||
if (!rc->rc_tracked) {
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return;
|
||||
}
|
||||
|
||||
for (ref = list_head(&rc->rc_list); ref;
|
||||
ref = list_next(&rc->rc_list, ref)) {
|
||||
if (ref->ref_holder == current_holder) {
|
||||
ref->ref_holder = new_holder;
|
||||
found = B_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ASSERT(found);
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* If tracking is enabled, return true if a reference exists that matches
|
||||
* the "holder" tag. If tracking is disabled, then return true if a reference
|
||||
* might be held.
|
||||
*/
|
||||
boolean_t
|
||||
refcount_held(refcount_t *rc, void *holder)
|
||||
{
|
||||
reference_t *ref;
|
||||
|
||||
mutex_enter(&rc->rc_mtx);
|
||||
|
||||
if (!rc->rc_tracked) {
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return (rc->rc_count > 0);
|
||||
}
|
||||
|
||||
for (ref = list_head(&rc->rc_list); ref;
|
||||
ref = list_next(&rc->rc_list, ref)) {
|
||||
if (ref->ref_holder == holder) {
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return (B_TRUE);
|
||||
}
|
||||
}
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* If tracking is enabled, return true if a reference does not exist that
|
||||
* matches the "holder" tag. If tracking is disabled, always return true
|
||||
* since the reference might not be held.
|
||||
*/
|
||||
boolean_t
|
||||
refcount_not_held(refcount_t *rc, void *holder)
|
||||
{
|
||||
reference_t *ref;
|
||||
|
||||
mutex_enter(&rc->rc_mtx);
|
||||
|
||||
if (!rc->rc_tracked) {
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
for (ref = list_head(&rc->rc_list); ref;
|
||||
ref = list_next(&rc->rc_list, ref)) {
|
||||
if (ref->ref_holder == holder) {
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return (B_FALSE);
|
||||
}
|
||||
}
|
||||
mutex_exit(&rc->rc_mtx);
|
||||
return (B_TRUE);
|
||||
}
|
||||
#endif /* ZFS_DEBUG */
|
||||
|
@ -1332,7 +1332,6 @@ spa_unload(spa_t *spa)
|
||||
|
||||
ddt_unload(spa);
|
||||
|
||||
|
||||
/*
|
||||
* Drop and purge level 2 cache
|
||||
*/
|
||||
@ -3720,6 +3719,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
spa->spa_uberblock.ub_txg = txg - 1;
|
||||
spa->spa_uberblock.ub_version = version;
|
||||
spa->spa_ubsync = spa->spa_uberblock;
|
||||
spa->spa_load_state = SPA_LOAD_CREATE;
|
||||
|
||||
/*
|
||||
* Create "The Godfather" zio to hold all async IOs
|
||||
@ -3905,6 +3905,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
||||
*/
|
||||
spa_evicting_os_wait(spa);
|
||||
spa->spa_minref = refcount_count(&spa->spa_refcount);
|
||||
spa->spa_load_state = SPA_LOAD_NONE;
|
||||
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
@ -5615,7 +5616,7 @@ spa_nvlist_lookup_by_guid(nvlist_t **nvpp, int count, uint64_t target_guid)
|
||||
|
||||
static void
|
||||
spa_vdev_remove_aux(nvlist_t *config, char *name, nvlist_t **dev, int count,
|
||||
nvlist_t *dev_to_remove)
|
||||
nvlist_t *dev_to_remove)
|
||||
{
|
||||
nvlist_t **newdev = NULL;
|
||||
|
||||
@ -6830,6 +6831,8 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
vdev_t *vd;
|
||||
dmu_tx_t *tx;
|
||||
int error;
|
||||
uint32_t max_queue_depth = zfs_vdev_async_write_max_active *
|
||||
zfs_vdev_queue_depth_pct / 100;
|
||||
|
||||
VERIFY(spa_writeable(spa));
|
||||
|
||||
@ -6841,6 +6844,10 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
spa->spa_syncing_txg = txg;
|
||||
spa->spa_sync_pass = 0;
|
||||
|
||||
mutex_enter(&spa->spa_alloc_lock);
|
||||
VERIFY0(avl_numnodes(&spa->spa_alloc_tree));
|
||||
mutex_exit(&spa->spa_alloc_lock);
|
||||
|
||||
/*
|
||||
* If there are any pending vdev state changes, convert them
|
||||
* into config changes that go out with this transaction group.
|
||||
@ -6899,6 +6906,38 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the top-level vdev's max queue depth. Evaluate each
|
||||
* top-level's async write queue depth in case it changed.
|
||||
* The max queue depth will not change in the middle of syncing
|
||||
* out this txg.
|
||||
*/
|
||||
uint64_t queue_depth_total = 0;
|
||||
for (int c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *tvd = rvd->vdev_child[c];
|
||||
metaslab_group_t *mg = tvd->vdev_mg;
|
||||
|
||||
if (mg == NULL || mg->mg_class != spa_normal_class(spa) ||
|
||||
!metaslab_group_initialized(mg))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* It is safe to do a lock-free check here because only async
|
||||
* allocations look at mg_max_alloc_queue_depth, and async
|
||||
* allocations all happen from spa_sync().
|
||||
*/
|
||||
ASSERT0(refcount_count(&mg->mg_alloc_queue_depth));
|
||||
mg->mg_max_alloc_queue_depth = max_queue_depth;
|
||||
queue_depth_total += mg->mg_max_alloc_queue_depth;
|
||||
}
|
||||
metaslab_class_t *mc = spa_normal_class(spa);
|
||||
ASSERT0(refcount_count(&mc->mc_alloc_slots));
|
||||
mc->mc_alloc_max_slots = queue_depth_total;
|
||||
mc->mc_alloc_throttle_enabled = zio_dva_throttle_enabled;
|
||||
|
||||
ASSERT3U(mc->mc_alloc_max_slots, <=,
|
||||
max_queue_depth * rvd->vdev_children);
|
||||
|
||||
/*
|
||||
* Iterate to convergence.
|
||||
*/
|
||||
@ -7056,6 +7095,10 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
|
||||
dsl_pool_sync_done(dp, txg);
|
||||
|
||||
mutex_enter(&spa->spa_alloc_lock);
|
||||
VERIFY0(avl_numnodes(&spa->spa_alloc_tree));
|
||||
mutex_exit(&spa->spa_alloc_lock);
|
||||
|
||||
/*
|
||||
* Update usable space statistics.
|
||||
*/
|
||||
|
@ -657,6 +657,7 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
||||
mutex_init(&spa->spa_scrub_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_suspend_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_vdev_top_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&spa->spa_alloc_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
cv_init(&spa->spa_async_cv, NULL, CV_DEFAULT, NULL);
|
||||
cv_init(&spa->spa_evicting_os_cv, NULL, CV_DEFAULT, NULL);
|
||||
@ -713,6 +714,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
||||
spa_active_count++;
|
||||
}
|
||||
|
||||
avl_create(&spa->spa_alloc_tree, zio_timestamp_compare,
|
||||
sizeof (zio_t), offsetof(zio_t, io_alloc_node));
|
||||
|
||||
/*
|
||||
* Every pool starts with the default cachefile
|
||||
*/
|
||||
@ -791,6 +795,7 @@ spa_remove(spa_t *spa)
|
||||
kmem_free(dp, sizeof (spa_config_dirent_t));
|
||||
}
|
||||
|
||||
avl_destroy(&spa->spa_alloc_tree);
|
||||
list_destroy(&spa->spa_config_list);
|
||||
|
||||
nvlist_free(spa->spa_label_features);
|
||||
@ -824,6 +829,7 @@ spa_remove(spa_t *spa)
|
||||
cv_destroy(&spa->spa_scrub_io_cv);
|
||||
cv_destroy(&spa->spa_suspend_cv);
|
||||
|
||||
mutex_destroy(&spa->spa_alloc_lock);
|
||||
mutex_destroy(&spa->spa_async_lock);
|
||||
mutex_destroy(&spa->spa_errlist_lock);
|
||||
mutex_destroy(&spa->spa_errlog_lock);
|
||||
|
@ -43,51 +43,83 @@ extern "C" {
|
||||
*/
|
||||
#define ARC_EVICT_ALL -1ULL
|
||||
|
||||
#define HDR_SET_LSIZE(hdr, x) do { \
|
||||
ASSERT(IS_P2ALIGNED(x, 1U << SPA_MINBLOCKSHIFT)); \
|
||||
(hdr)->b_lsize = ((x) >> SPA_MINBLOCKSHIFT); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define HDR_SET_PSIZE(hdr, x) do { \
|
||||
ASSERT(IS_P2ALIGNED((x), 1U << SPA_MINBLOCKSHIFT)); \
|
||||
(hdr)->b_psize = ((x) >> SPA_MINBLOCKSHIFT); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define HDR_GET_LSIZE(hdr) ((hdr)->b_lsize << SPA_MINBLOCKSHIFT)
|
||||
#define HDR_GET_PSIZE(hdr) ((hdr)->b_psize << SPA_MINBLOCKSHIFT)
|
||||
|
||||
typedef struct arc_buf_hdr arc_buf_hdr_t;
|
||||
typedef struct arc_buf arc_buf_t;
|
||||
typedef void arc_done_func_t(zio_t *zio, arc_buf_t *buf, void *priv);
|
||||
typedef int arc_evict_func_t(void *priv);
|
||||
|
||||
/* generic arc_done_func_t's which you can use */
|
||||
arc_done_func_t arc_bcopy_func;
|
||||
arc_done_func_t arc_getbuf_func;
|
||||
|
||||
extern int zfs_arc_num_sublists_per_state;
|
||||
|
||||
typedef enum arc_flags
|
||||
{
|
||||
/*
|
||||
* Public flags that can be passed into the ARC by external consumers.
|
||||
*/
|
||||
ARC_FLAG_NONE = 1 << 0, /* No flags set */
|
||||
ARC_FLAG_WAIT = 1 << 1, /* perform sync I/O */
|
||||
ARC_FLAG_NOWAIT = 1 << 2, /* perform async I/O */
|
||||
ARC_FLAG_PREFETCH = 1 << 3, /* I/O is a prefetch */
|
||||
ARC_FLAG_CACHED = 1 << 4, /* I/O was in cache */
|
||||
ARC_FLAG_L2CACHE = 1 << 5, /* cache in L2ARC */
|
||||
ARC_FLAG_L2COMPRESS = 1 << 6, /* compress in L2ARC */
|
||||
ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 7, /* I/O from zfetch */
|
||||
ARC_FLAG_WAIT = 1 << 0, /* perform sync I/O */
|
||||
ARC_FLAG_NOWAIT = 1 << 1, /* perform async I/O */
|
||||
ARC_FLAG_PREFETCH = 1 << 2, /* I/O is a prefetch */
|
||||
ARC_FLAG_CACHED = 1 << 3, /* I/O was in cache */
|
||||
ARC_FLAG_L2CACHE = 1 << 4, /* cache in L2ARC */
|
||||
ARC_FLAG_PREDICTIVE_PREFETCH = 1 << 5, /* I/O from zfetch */
|
||||
|
||||
/*
|
||||
* Private ARC flags. These flags are private ARC only flags that
|
||||
* will show up in b_flags in the arc_hdr_buf_t. These flags should
|
||||
* only be set by ARC code.
|
||||
*/
|
||||
ARC_FLAG_IN_HASH_TABLE = 1 << 8, /* buffer is hashed */
|
||||
ARC_FLAG_IO_IN_PROGRESS = 1 << 9, /* I/O in progress */
|
||||
ARC_FLAG_IO_ERROR = 1 << 10, /* I/O failed for buf */
|
||||
ARC_FLAG_FREED_IN_READ = 1 << 11, /* freed during read */
|
||||
ARC_FLAG_BUF_AVAILABLE = 1 << 12, /* block not in use */
|
||||
ARC_FLAG_INDIRECT = 1 << 13, /* indirect block */
|
||||
ARC_FLAG_IN_HASH_TABLE = 1 << 6, /* buffer is hashed */
|
||||
ARC_FLAG_IO_IN_PROGRESS = 1 << 7, /* I/O in progress */
|
||||
ARC_FLAG_IO_ERROR = 1 << 8, /* I/O failed for buf */
|
||||
ARC_FLAG_INDIRECT = 1 << 9, /* indirect block */
|
||||
/* Indicates that block was read with ASYNC priority. */
|
||||
ARC_FLAG_PRIO_ASYNC_READ = 1 << 14,
|
||||
ARC_FLAG_L2_WRITING = 1 << 15, /* write in progress */
|
||||
ARC_FLAG_L2_EVICTED = 1 << 16, /* evicted during I/O */
|
||||
ARC_FLAG_L2_WRITE_HEAD = 1 << 17, /* head of write list */
|
||||
ARC_FLAG_PRIO_ASYNC_READ = 1 << 10,
|
||||
ARC_FLAG_L2_WRITING = 1 << 11, /* write in progress */
|
||||
ARC_FLAG_L2_EVICTED = 1 << 12, /* evicted during I/O */
|
||||
ARC_FLAG_L2_WRITE_HEAD = 1 << 13, /* head of write list */
|
||||
/* indicates that the buffer contains metadata (otherwise, data) */
|
||||
ARC_FLAG_BUFC_METADATA = 1 << 18,
|
||||
ARC_FLAG_BUFC_METADATA = 1 << 14,
|
||||
|
||||
/* Flags specifying whether optional hdr struct fields are defined */
|
||||
ARC_FLAG_HAS_L1HDR = 1 << 19,
|
||||
ARC_FLAG_HAS_L2HDR = 1 << 20,
|
||||
ARC_FLAG_HAS_L1HDR = 1 << 15,
|
||||
ARC_FLAG_HAS_L2HDR = 1 << 16,
|
||||
|
||||
/*
|
||||
* Indicates the arc_buf_hdr_t's b_pdata matches the on-disk data.
|
||||
* This allows the l2arc to use the blkptr's checksum to verify
|
||||
* the data without having to store the checksum in the hdr.
|
||||
*/
|
||||
ARC_FLAG_COMPRESSED_ARC = 1 << 17,
|
||||
ARC_FLAG_SHARED_DATA = 1 << 18,
|
||||
|
||||
/*
|
||||
* The arc buffer's compression mode is stored in the top 7 bits of the
|
||||
* flags field, so these dummy flags are included so that MDB can
|
||||
* interpret the enum properly.
|
||||
*/
|
||||
ARC_FLAG_COMPRESS_0 = 1 << 24,
|
||||
ARC_FLAG_COMPRESS_1 = 1 << 25,
|
||||
ARC_FLAG_COMPRESS_2 = 1 << 26,
|
||||
ARC_FLAG_COMPRESS_3 = 1 << 27,
|
||||
ARC_FLAG_COMPRESS_4 = 1 << 28,
|
||||
ARC_FLAG_COMPRESS_5 = 1 << 29,
|
||||
ARC_FLAG_COMPRESS_6 = 1 << 30
|
||||
|
||||
} arc_flags_t;
|
||||
|
||||
struct arc_buf {
|
||||
@ -95,11 +127,10 @@ struct arc_buf {
|
||||
arc_buf_t *b_next;
|
||||
kmutex_t b_evict_lock;
|
||||
void *b_data;
|
||||
arc_evict_func_t *b_efunc;
|
||||
void *b_private;
|
||||
};
|
||||
|
||||
typedef enum arc_buf_contents {
|
||||
ARC_BUFC_INVALID, /* invalid type */
|
||||
ARC_BUFC_DATA, /* buffer contains data */
|
||||
ARC_BUFC_METADATA, /* buffer contains metadata */
|
||||
ARC_BUFC_NUMTYPES
|
||||
@ -119,19 +150,17 @@ typedef enum arc_space_type {
|
||||
|
||||
void arc_space_consume(uint64_t space, arc_space_type_t type);
|
||||
void arc_space_return(uint64_t space, arc_space_type_t type);
|
||||
arc_buf_t *arc_buf_alloc(spa_t *spa, int size, void *tag,
|
||||
arc_buf_t *arc_alloc_buf(spa_t *spa, int32_t size, void *tag,
|
||||
arc_buf_contents_t type);
|
||||
arc_buf_t *arc_loan_buf(spa_t *spa, int size);
|
||||
void arc_return_buf(arc_buf_t *buf, void *tag);
|
||||
void arc_loan_inuse_buf(arc_buf_t *buf, void *tag);
|
||||
void arc_buf_add_ref(arc_buf_t *buf, void *tag);
|
||||
boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag);
|
||||
void arc_buf_destroy(arc_buf_t *buf, void *tag);
|
||||
int arc_buf_size(arc_buf_t *buf);
|
||||
void arc_release(arc_buf_t *buf, void *tag);
|
||||
int arc_released(arc_buf_t *buf);
|
||||
void arc_buf_freeze(arc_buf_t *buf);
|
||||
void arc_buf_thaw(arc_buf_t *buf);
|
||||
boolean_t arc_buf_eviction_needed(arc_buf_t *buf);
|
||||
#ifdef ZFS_DEBUG
|
||||
int arc_referenced(arc_buf_t *buf);
|
||||
#endif
|
||||
@ -140,21 +169,18 @@ int arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
|
||||
arc_done_func_t *done, void *priv, zio_priority_t priority, int flags,
|
||||
arc_flags_t *arc_flags, const zbookmark_phys_t *zb);
|
||||
zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg,
|
||||
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, boolean_t l2arc_compress,
|
||||
const zio_prop_t *zp,
|
||||
blkptr_t *bp, arc_buf_t *buf, boolean_t l2arc, const zio_prop_t *zp,
|
||||
arc_done_func_t *ready, arc_done_func_t *child_ready,
|
||||
arc_done_func_t *physdone, arc_done_func_t *done,
|
||||
void *priv, zio_priority_t priority, int zio_flags,
|
||||
const zbookmark_phys_t *zb);
|
||||
void arc_freed(spa_t *spa, const blkptr_t *bp);
|
||||
|
||||
void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *priv);
|
||||
boolean_t arc_clear_callback(arc_buf_t *buf);
|
||||
|
||||
void arc_flush(spa_t *spa, boolean_t retry);
|
||||
void arc_tempreserve_clear(uint64_t reserve);
|
||||
int arc_tempreserve_space(uint64_t reserve, uint64_t txg);
|
||||
|
||||
uint64_t arc_max_bytes(void);
|
||||
void arc_init(void);
|
||||
void arc_fini(void);
|
||||
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/zrlock.h>
|
||||
#include <sys/multilist.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
@ -228,6 +229,11 @@ typedef struct dmu_buf_impl {
|
||||
*/
|
||||
avl_node_t db_link;
|
||||
|
||||
/*
|
||||
* Link in dbuf_cache.
|
||||
*/
|
||||
multilist_node_t db_cache_link;
|
||||
|
||||
/* Data which is unique to data (leaf) blocks: */
|
||||
|
||||
/* User callback information. */
|
||||
@ -305,8 +311,7 @@ void dmu_buf_write_embedded(dmu_buf_t *dbuf, void *data,
|
||||
bp_embedded_type_t etype, enum zio_compress comp,
|
||||
int uncompressed_size, int compressed_size, int byteorder, dmu_tx_t *tx);
|
||||
|
||||
void dbuf_clear(dmu_buf_impl_t *db);
|
||||
void dbuf_evict(dmu_buf_impl_t *db);
|
||||
void dbuf_destroy(dmu_buf_impl_t *db);
|
||||
|
||||
void dbuf_setdirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
|
||||
void dbuf_unoverride(dbuf_dirty_record_t *dr);
|
||||
@ -342,10 +347,6 @@ boolean_t dbuf_is_metadata(dmu_buf_impl_t *db);
|
||||
(dbuf_is_metadata(_db) && \
|
||||
((_db)->db_objset->os_secondary_cache == ZFS_CACHE_METADATA)))
|
||||
|
||||
#define DBUF_IS_L2COMPRESSIBLE(_db) \
|
||||
((_db)->db_objset->os_compress != ZIO_COMPRESS_OFF || \
|
||||
(dbuf_is_metadata(_db) && zfs_mdcomp_disable == B_FALSE))
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
|
||||
/*
|
||||
|
@ -21,7 +21,7 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
||||
* Copyright 2013 DEY Storage Systems, Inc.
|
||||
@ -78,6 +78,7 @@ struct file;
|
||||
typedef struct objset objset_t;
|
||||
typedef struct dmu_tx dmu_tx_t;
|
||||
typedef struct dsl_dir dsl_dir_t;
|
||||
typedef struct dnode dnode_t;
|
||||
|
||||
typedef enum dmu_object_byteswap {
|
||||
DMU_BSWAP_UINT8,
|
||||
@ -419,7 +420,7 @@ dmu_write_embedded(objset_t *os, uint64_t object, uint64_t offset,
|
||||
#define WP_DMU_SYNC 0x2
|
||||
#define WP_SPILL 0x4
|
||||
|
||||
void dmu_write_policy(objset_t *os, struct dnode *dn, int level, int wp,
|
||||
void dmu_write_policy(objset_t *os, dnode_t *dn, int level, int wp,
|
||||
struct zio_prop *zp);
|
||||
/*
|
||||
* The bonus data is accessed more or less like a regular buffer.
|
||||
@ -445,7 +446,7 @@ int dmu_rm_spill(objset_t *, uint64_t, dmu_tx_t *);
|
||||
*/
|
||||
|
||||
int dmu_spill_hold_by_bonus(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
int dmu_spill_hold_by_dnode(struct dnode *dn, uint32_t flags,
|
||||
int dmu_spill_hold_by_dnode(dnode_t *dn, uint32_t flags,
|
||||
void *tag, dmu_buf_t **dbp);
|
||||
int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
|
||||
@ -465,6 +466,8 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp);
|
||||
*/
|
||||
int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset,
|
||||
void *tag, dmu_buf_t **, int flags);
|
||||
int dmu_buf_hold_by_dnode(dnode_t *dn, uint64_t offset,
|
||||
void *tag, dmu_buf_t **dbp, int flags);
|
||||
|
||||
/*
|
||||
* Add a reference to a dmu buffer that has already been held via
|
||||
@ -617,6 +620,10 @@ void *dmu_buf_remove_user(dmu_buf_t *db, dmu_buf_user_t *user);
|
||||
*/
|
||||
void *dmu_buf_get_user(dmu_buf_t *db);
|
||||
|
||||
objset_t *dmu_buf_get_objset(dmu_buf_t *db);
|
||||
dnode_t *dmu_buf_dnode_enter(dmu_buf_t *db);
|
||||
void dmu_buf_dnode_exit(dmu_buf_t *db);
|
||||
|
||||
/* Block until any in-progress dmu buf user evictions complete. */
|
||||
void dmu_buf_user_evict_wait(void);
|
||||
|
||||
@ -799,7 +806,7 @@ extern const dmu_object_byteswap_info_t dmu_ot_byteswap[DMU_BSWAP_NUMFUNCS];
|
||||
*/
|
||||
int dmu_object_info(objset_t *os, uint64_t object, dmu_object_info_t *doi);
|
||||
/* Like dmu_object_info, but faster if you have a held dnode in hand. */
|
||||
void dmu_object_info_from_dnode(struct dnode *dn, dmu_object_info_t *doi);
|
||||
void dmu_object_info_from_dnode(dnode_t *dn, dmu_object_info_t *doi);
|
||||
/* Like dmu_object_info, but faster if you have a held dbuf in hand. */
|
||||
void dmu_object_info_from_db(dmu_buf_t *db, dmu_object_info_t *doi);
|
||||
/*
|
||||
|
@ -301,6 +301,8 @@ typedef struct dmu_sendarg {
|
||||
uint64_t dsa_last_data_offset;
|
||||
uint64_t dsa_resume_object;
|
||||
uint64_t dsa_resume_offset;
|
||||
boolean_t dsa_sent_begin;
|
||||
boolean_t dsa_sent_end;
|
||||
} dmu_sendarg_t;
|
||||
|
||||
void dmu_object_zapify(objset_t *, uint64_t, dmu_object_type_t, dmu_tx_t *);
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
@ -149,7 +149,7 @@ typedef struct dnode_phys {
|
||||
blkptr_t dn_spill;
|
||||
} dnode_phys_t;
|
||||
|
||||
typedef struct dnode {
|
||||
struct dnode {
|
||||
/*
|
||||
* Protects the structure of the dnode, including the number of levels
|
||||
* of indirection (dn_nlevels), dn_maxblkid, and dn_next_*
|
||||
@ -247,7 +247,7 @@ typedef struct dnode {
|
||||
|
||||
/* holds prefetch structure */
|
||||
struct zfetch dn_zfetch;
|
||||
} dnode_t;
|
||||
};
|
||||
|
||||
/*
|
||||
* Adds a level of indirection between the dbuf and the dnode to avoid
|
||||
|
@ -272,7 +272,6 @@ int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
|
||||
minor_t cleanup_minor, const char *htag);
|
||||
|
||||
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);
|
||||
void dsl_dataset_set_blkptr(dsl_dataset_t *ds, blkptr_t *bp, dmu_tx_t *tx);
|
||||
|
||||
spa_t *dsl_dataset_get_spa(dsl_dataset_t *ds);
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_H
|
||||
@ -55,14 +55,15 @@ void metaslab_sync_done(metaslab_t *, uint64_t);
|
||||
void metaslab_sync_reassess(metaslab_group_t *);
|
||||
uint64_t metaslab_block_maxsize(metaslab_t *);
|
||||
|
||||
#define METASLAB_HINTBP_FAVOR 0x0
|
||||
#define METASLAB_HINTBP_AVOID 0x1
|
||||
#define METASLAB_GANG_HEADER 0x2
|
||||
#define METASLAB_GANG_CHILD 0x4
|
||||
#define METASLAB_GANG_AVOID 0x8
|
||||
#define METASLAB_HINTBP_FAVOR 0x0
|
||||
#define METASLAB_HINTBP_AVOID 0x1
|
||||
#define METASLAB_GANG_HEADER 0x2
|
||||
#define METASLAB_GANG_CHILD 0x4
|
||||
#define METASLAB_ASYNC_ALLOC 0x8
|
||||
#define METASLAB_DONT_THROTTLE 0x10
|
||||
|
||||
int metaslab_alloc(spa_t *, metaslab_class_t *, uint64_t,
|
||||
blkptr_t *, int, uint64_t, blkptr_t *, int);
|
||||
blkptr_t *, int, uint64_t, blkptr_t *, int, zio_t *);
|
||||
void metaslab_free(spa_t *, const blkptr_t *, uint64_t, boolean_t);
|
||||
int metaslab_claim(spa_t *, const blkptr_t *, uint64_t);
|
||||
void metaslab_check_free(spa_t *, const blkptr_t *);
|
||||
@ -73,6 +74,9 @@ int metaslab_class_validate(metaslab_class_t *);
|
||||
void metaslab_class_histogram_verify(metaslab_class_t *);
|
||||
uint64_t metaslab_class_fragmentation(metaslab_class_t *);
|
||||
uint64_t metaslab_class_expandable_space(metaslab_class_t *);
|
||||
boolean_t metaslab_class_throttle_reserve(metaslab_class_t *, int,
|
||||
zio_t *, int);
|
||||
void metaslab_class_throttle_unreserve(metaslab_class_t *, int, zio_t *);
|
||||
|
||||
void metaslab_class_space_update(metaslab_class_t *, int64_t, int64_t,
|
||||
int64_t, int64_t);
|
||||
@ -86,10 +90,13 @@ metaslab_group_t *metaslab_group_create(metaslab_class_t *, vdev_t *);
|
||||
void metaslab_group_destroy(metaslab_group_t *);
|
||||
void metaslab_group_activate(metaslab_group_t *);
|
||||
void metaslab_group_passivate(metaslab_group_t *);
|
||||
boolean_t metaslab_group_initialized(metaslab_group_t *);
|
||||
uint64_t metaslab_group_get_space(metaslab_group_t *);
|
||||
void metaslab_group_histogram_verify(metaslab_group_t *);
|
||||
uint64_t metaslab_group_fragmentation(metaslab_group_t *);
|
||||
void metaslab_group_histogram_remove(metaslab_group_t *, metaslab_t *);
|
||||
void metaslab_group_alloc_decrement(spa_t *, uint64_t, void *, int);
|
||||
void metaslab_group_alloc_verify(spa_t *, const blkptr_t *, void *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -24,7 +24,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
@ -59,11 +59,42 @@ extern "C" {
|
||||
* to use a block allocator that best suits that class.
|
||||
*/
|
||||
struct metaslab_class {
|
||||
kmutex_t mc_lock;
|
||||
spa_t *mc_spa;
|
||||
metaslab_group_t *mc_rotor;
|
||||
metaslab_ops_t *mc_ops;
|
||||
uint64_t mc_aliquot;
|
||||
|
||||
/*
|
||||
* Track the number of metaslab groups that have been initialized
|
||||
* and can accept allocations. An initialized metaslab group is
|
||||
* one has been completely added to the config (i.e. we have
|
||||
* updated the MOS config and the space has been added to the pool).
|
||||
*/
|
||||
uint64_t mc_groups;
|
||||
|
||||
/*
|
||||
* Toggle to enable/disable the allocation throttle.
|
||||
*/
|
||||
boolean_t mc_alloc_throttle_enabled;
|
||||
|
||||
/*
|
||||
* The allocation throttle works on a reservation system. Whenever
|
||||
* an asynchronous zio wants to perform an allocation it must
|
||||
* first reserve the number of blocks that it wants to allocate.
|
||||
* If there aren't sufficient slots available for the pending zio
|
||||
* then that I/O is throttled until more slots free up. The current
|
||||
* number of reserved allocations is maintained by the mc_alloc_slots
|
||||
* refcount. The mc_alloc_max_slots value determines the maximum
|
||||
* number of allocations that the system allows. Gang blocks are
|
||||
* allowed to reserve slots even if we've reached the maximum
|
||||
* number of allocations allowed.
|
||||
*/
|
||||
uint64_t mc_alloc_max_slots;
|
||||
refcount_t mc_alloc_slots;
|
||||
|
||||
uint64_t mc_alloc_groups; /* # of allocatable groups */
|
||||
|
||||
uint64_t mc_alloc; /* total allocated space */
|
||||
uint64_t mc_deferred; /* total deferred frees */
|
||||
uint64_t mc_space; /* total space (alloc + free) */
|
||||
@ -86,6 +117,15 @@ struct metaslab_group {
|
||||
avl_tree_t mg_metaslab_tree;
|
||||
uint64_t mg_aliquot;
|
||||
boolean_t mg_allocatable; /* can we allocate? */
|
||||
|
||||
/*
|
||||
* A metaslab group is considered to be initialized only after
|
||||
* we have updated the MOS config and added the space to the pool.
|
||||
* We only allow allocation attempts to a metaslab group if it
|
||||
* has been initialized.
|
||||
*/
|
||||
boolean_t mg_initialized;
|
||||
|
||||
uint64_t mg_free_capacity; /* percentage free */
|
||||
int64_t mg_bias;
|
||||
int64_t mg_activation_count;
|
||||
@ -94,6 +134,27 @@ struct metaslab_group {
|
||||
taskq_t *mg_taskq;
|
||||
metaslab_group_t *mg_prev;
|
||||
metaslab_group_t *mg_next;
|
||||
|
||||
/*
|
||||
* Each metaslab group can handle mg_max_alloc_queue_depth allocations
|
||||
* which are tracked by mg_alloc_queue_depth. It's possible for a
|
||||
* metaslab group to handle more allocations than its max. This
|
||||
* can occur when gang blocks are required or when other groups
|
||||
* are unable to handle their share of allocations.
|
||||
*/
|
||||
uint64_t mg_max_alloc_queue_depth;
|
||||
refcount_t mg_alloc_queue_depth;
|
||||
|
||||
/*
|
||||
* A metalab group that can no longer allocate the minimum block
|
||||
* size will set mg_no_free_space. Once a metaslab group is out
|
||||
* of space then its share of work must be distributed to other
|
||||
* groups.
|
||||
*/
|
||||
boolean_t mg_no_free_space;
|
||||
|
||||
uint64_t mg_allocations;
|
||||
uint64_t mg_failed_allocations;
|
||||
uint64_t mg_fragmentation;
|
||||
uint64_t mg_histogram[RANGE_TREE_HISTOGRAM_SIZE];
|
||||
};
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_REFCOUNT_H
|
||||
@ -64,6 +64,7 @@ typedef struct refcount {
|
||||
|
||||
void refcount_create(refcount_t *rc);
|
||||
void refcount_create_untracked(refcount_t *rc);
|
||||
void refcount_create_tracked(refcount_t *rc);
|
||||
void refcount_destroy(refcount_t *rc);
|
||||
void refcount_destroy_many(refcount_t *rc, uint64_t number);
|
||||
int refcount_is_zero(refcount_t *rc);
|
||||
@ -73,6 +74,9 @@ int64_t refcount_remove(refcount_t *rc, void *holder_tag);
|
||||
int64_t refcount_add_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
int64_t refcount_remove_many(refcount_t *rc, uint64_t number, void *holder_tag);
|
||||
void refcount_transfer(refcount_t *dst, refcount_t *src);
|
||||
void refcount_transfer_ownership(refcount_t *, void *, void *);
|
||||
boolean_t refcount_held(refcount_t *, void *);
|
||||
boolean_t refcount_not_held(refcount_t *, void *);
|
||||
|
||||
void refcount_sysinit(void);
|
||||
void refcount_fini(void);
|
||||
@ -85,6 +89,7 @@ typedef struct refcount {
|
||||
|
||||
#define refcount_create(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_create_untracked(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_create_tracked(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy(rc) ((rc)->rc_count = 0)
|
||||
#define refcount_destroy_many(rc, number) ((rc)->rc_count = 0)
|
||||
#define refcount_is_zero(rc) ((rc)->rc_count == 0)
|
||||
@ -100,6 +105,9 @@ typedef struct refcount {
|
||||
atomic_add_64(&(src)->rc_count, -__tmp); \
|
||||
atomic_add_64(&(dst)->rc_count, __tmp); \
|
||||
}
|
||||
#define refcount_transfer_ownership(rc, current_holder, new_holder)
|
||||
#define refcount_held(rc, holder) ((rc)->rc_count > 0)
|
||||
#define refcount_not_held(rc, holder) (B_TRUE)
|
||||
|
||||
#define refcount_sysinit()
|
||||
#define refcount_fini()
|
||||
|
@ -149,6 +149,8 @@ _NOTE(CONSTCOND) } while (0)
|
||||
#define SPA_PSIZEBITS 16 /* PSIZE up to 32M (2^16 * 512) */
|
||||
#define SPA_ASIZEBITS 24 /* ASIZE up to 64 times larger */
|
||||
|
||||
#define SPA_COMPRESSBITS 7
|
||||
|
||||
/*
|
||||
* All SPA data is represented by 128-bit data virtual addresses (DVAs).
|
||||
* The members of the dva_t should be considered opaque outside the SPA.
|
||||
@ -391,8 +393,10 @@ _NOTE(CONSTCOND) } while (0)
|
||||
16, SPA_PSIZEBITS, SPA_MINBLOCKSHIFT, 1, x); \
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
#define BP_GET_COMPRESS(bp) BF64_GET((bp)->blk_prop, 32, 7)
|
||||
#define BP_SET_COMPRESS(bp, x) BF64_SET((bp)->blk_prop, 32, 7, x)
|
||||
#define BP_GET_COMPRESS(bp) \
|
||||
BF64_GET((bp)->blk_prop, 32, SPA_COMPRESSBITS)
|
||||
#define BP_SET_COMPRESS(bp, x) \
|
||||
BF64_SET((bp)->blk_prop, 32, SPA_COMPRESSBITS, x)
|
||||
|
||||
#define BP_IS_EMBEDDED(bp) BF64_GET((bp)->blk_prop, 39, 1)
|
||||
#define BP_SET_EMBEDDED(bp, x) BF64_SET((bp)->blk_prop, 39, 1, x)
|
||||
|
@ -165,6 +165,8 @@ struct spa {
|
||||
uint64_t spa_last_synced_guid; /* last synced guid */
|
||||
list_t spa_config_dirty_list; /* vdevs with dirty config */
|
||||
list_t spa_state_dirty_list; /* vdevs with dirty state */
|
||||
kmutex_t spa_alloc_lock;
|
||||
avl_tree_t spa_alloc_tree;
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
spa_aux_vdev_t spa_l2cache; /* L2ARC cache devices */
|
||||
nvlist_t *spa_label_features; /* Features for reading MOS */
|
||||
|
@ -53,6 +53,9 @@ typedef struct vdev_queue vdev_queue_t;
|
||||
typedef struct vdev_cache vdev_cache_t;
|
||||
typedef struct vdev_cache_entry vdev_cache_entry_t;
|
||||
|
||||
extern int zfs_vdev_queue_depth_pct;
|
||||
extern uint32_t zfs_vdev_async_write_max_active;
|
||||
|
||||
/*
|
||||
* Virtual device operations
|
||||
*/
|
||||
@ -190,9 +193,20 @@ struct vdev {
|
||||
uint64_t vdev_deflate_ratio; /* deflation ratio (x512) */
|
||||
uint64_t vdev_islog; /* is an intent log device */
|
||||
uint64_t vdev_removing; /* device is being removed? */
|
||||
boolean_t vdev_ishole; /* is a hole in the namespace */
|
||||
boolean_t vdev_ishole; /* is a hole in the namespace */
|
||||
kmutex_t vdev_queue_lock; /* protects vdev_queue_depth */
|
||||
uint64_t vdev_top_zap;
|
||||
|
||||
/*
|
||||
* The queue depth parameters determine how many async writes are
|
||||
* still pending (i.e. allocated by net yet issued to disk) per
|
||||
* top-level (vdev_async_write_queue_depth) and the maximum allowed
|
||||
* (vdev_max_async_write_queue_depth). These values only apply to
|
||||
* top-level vdevs.
|
||||
*/
|
||||
uint64_t vdev_async_write_queue_depth;
|
||||
uint64_t vdev_max_async_write_queue_depth;
|
||||
|
||||
/*
|
||||
* Leaf vdev state.
|
||||
*/
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZAP_H
|
||||
@ -216,8 +216,14 @@ int zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int zap_contains(objset_t *ds, uint64_t zapobj, const char *name);
|
||||
int zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int key_numints);
|
||||
int zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf);
|
||||
int zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp);
|
||||
|
||||
int zap_count_write(objset_t *os, uint64_t zapobj, const char *name,
|
||||
int zap_count_write_by_dnode(dnode_t *dn, const char *name,
|
||||
int add, refcount_t *towrite, refcount_t *tooverwrite);
|
||||
|
||||
/*
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
*/
|
||||
@ -197,8 +197,8 @@ typedef struct zap_name {
|
||||
|
||||
boolean_t zap_match(zap_name_t *zn, const char *matchname);
|
||||
int zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp);
|
||||
void zap_unlockdir(zap_t *zap);
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp);
|
||||
void zap_unlockdir(zap_t *zap, void *tag);
|
||||
void zap_evict(void *dbu);
|
||||
zap_name_t *zap_name_alloc(zap_t *zap, const char *key, matchtype_t mt);
|
||||
void zap_name_free(zap_name_t *zn);
|
||||
@ -217,9 +217,10 @@ void fzap_prefetch(zap_name_t *zn);
|
||||
int fzap_count_write(zap_name_t *zn, int add, refcount_t *towrite,
|
||||
refcount_t *tooverwrite);
|
||||
int fzap_add(zap_name_t *zn, uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx);
|
||||
const void *val, void *tag, dmu_tx_t *tx);
|
||||
int fzap_update(zap_name_t *zn,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx);
|
||||
int integer_size, uint64_t num_integers, const void *val,
|
||||
void *tag, dmu_tx_t *tx);
|
||||
int fzap_length(zap_name_t *zn,
|
||||
uint64_t *integer_size, uint64_t *num_integers);
|
||||
int fzap_remove(zap_name_t *zn, dmu_tx_t *tx);
|
||||
@ -229,7 +230,7 @@ void zap_put_leaf(struct zap_leaf *l);
|
||||
|
||||
int fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, dmu_tx_t *tx);
|
||||
const void *val, uint32_t cd, void *tag, dmu_tx_t *tx);
|
||||
void fzap_upgrade(zap_t *zap, dmu_tx_t *tx, zap_flags_t flags);
|
||||
int fzap_cursor_move_to_key(zap_cursor_t *zc, zap_name_t *zn);
|
||||
|
||||
|
@ -175,6 +175,7 @@ enum zio_flag {
|
||||
ZIO_FLAG_DONT_CACHE = 1 << 11,
|
||||
ZIO_FLAG_NODATA = 1 << 12,
|
||||
ZIO_FLAG_INDUCE_DAMAGE = 1 << 13,
|
||||
ZIO_FLAG_IO_ALLOCATING = 1 << 14,
|
||||
|
||||
#define ZIO_FLAG_DDT_INHERIT (ZIO_FLAG_IO_RETRY - 1)
|
||||
#define ZIO_FLAG_GANG_INHERIT (ZIO_FLAG_IO_RETRY - 1)
|
||||
@ -182,27 +183,27 @@ enum zio_flag {
|
||||
/*
|
||||
* Flags inherited by vdev children.
|
||||
*/
|
||||
ZIO_FLAG_IO_RETRY = 1 << 14, /* must be first for INHERIT */
|
||||
ZIO_FLAG_PROBE = 1 << 15,
|
||||
ZIO_FLAG_TRYHARD = 1 << 16,
|
||||
ZIO_FLAG_OPTIONAL = 1 << 17,
|
||||
ZIO_FLAG_IO_RETRY = 1 << 15, /* must be first for INHERIT */
|
||||
ZIO_FLAG_PROBE = 1 << 16,
|
||||
ZIO_FLAG_TRYHARD = 1 << 17,
|
||||
ZIO_FLAG_OPTIONAL = 1 << 18,
|
||||
|
||||
#define ZIO_FLAG_VDEV_INHERIT (ZIO_FLAG_DONT_QUEUE - 1)
|
||||
|
||||
/*
|
||||
* Flags not inherited by any children.
|
||||
*/
|
||||
ZIO_FLAG_DONT_QUEUE = 1 << 18, /* must be first for INHERIT */
|
||||
ZIO_FLAG_DONT_PROPAGATE = 1 << 19,
|
||||
ZIO_FLAG_IO_BYPASS = 1 << 20,
|
||||
ZIO_FLAG_IO_REWRITE = 1 << 21,
|
||||
ZIO_FLAG_RAW = 1 << 22,
|
||||
ZIO_FLAG_GANG_CHILD = 1 << 23,
|
||||
ZIO_FLAG_DDT_CHILD = 1 << 24,
|
||||
ZIO_FLAG_GODFATHER = 1 << 25,
|
||||
ZIO_FLAG_NOPWRITE = 1 << 26,
|
||||
ZIO_FLAG_REEXECUTED = 1 << 27,
|
||||
ZIO_FLAG_DELEGATED = 1 << 28,
|
||||
ZIO_FLAG_DONT_QUEUE = 1 << 19, /* must be first for INHERIT */
|
||||
ZIO_FLAG_DONT_PROPAGATE = 1 << 20,
|
||||
ZIO_FLAG_IO_BYPASS = 1 << 21,
|
||||
ZIO_FLAG_IO_REWRITE = 1 << 22,
|
||||
ZIO_FLAG_RAW = 1 << 23,
|
||||
ZIO_FLAG_GANG_CHILD = 1 << 24,
|
||||
ZIO_FLAG_DDT_CHILD = 1 << 25,
|
||||
ZIO_FLAG_GODFATHER = 1 << 26,
|
||||
ZIO_FLAG_NOPWRITE = 1 << 27,
|
||||
ZIO_FLAG_REEXECUTED = 1 << 28,
|
||||
ZIO_FLAG_DELEGATED = 1 << 29,
|
||||
};
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0
|
||||
@ -243,6 +244,7 @@ enum zio_wait_type {
|
||||
|
||||
typedef void zio_done_func_t(zio_t *zio);
|
||||
|
||||
extern boolean_t zio_dva_throttle_enabled;
|
||||
extern const char *zio_type_name[ZIO_TYPES];
|
||||
|
||||
/*
|
||||
@ -430,7 +432,6 @@ struct zio {
|
||||
blkptr_t io_bp_copy;
|
||||
list_t io_parent_list;
|
||||
list_t io_child_list;
|
||||
zio_link_t *io_walk_link;
|
||||
zio_t *io_logical;
|
||||
zio_transform_t *io_transform_stack;
|
||||
|
||||
@ -456,9 +457,11 @@ struct zio {
|
||||
|
||||
uint64_t io_offset;
|
||||
hrtime_t io_timestamp;
|
||||
hrtime_t io_queued_timestamp;
|
||||
hrtime_t io_target_timestamp;
|
||||
avl_node_t io_queue_node;
|
||||
avl_node_t io_offset_node;
|
||||
avl_node_t io_alloc_node;
|
||||
|
||||
/* Internal pipeline state */
|
||||
enum zio_flag io_flags;
|
||||
@ -467,6 +470,7 @@ struct zio {
|
||||
enum zio_flag io_orig_flags;
|
||||
enum zio_stage io_orig_stage;
|
||||
enum zio_stage io_orig_pipeline;
|
||||
enum zio_stage io_pipeline_trace;
|
||||
int io_error;
|
||||
int io_child_error[ZIO_CHILD_TYPES];
|
||||
uint64_t io_children[ZIO_CHILD_TYPES][ZIO_WAIT_TYPES];
|
||||
@ -492,6 +496,8 @@ struct zio {
|
||||
list_node_t io_trim_link;
|
||||
};
|
||||
|
||||
extern int zio_timestamp_compare(const void *, const void *);
|
||||
|
||||
extern zio_t *zio_null(zio_t *pio, spa_t *spa, vdev_t *vd,
|
||||
zio_done_func_t *done, void *priv, enum zio_flag flags);
|
||||
|
||||
@ -554,8 +560,8 @@ extern void zio_interrupt(zio_t *zio);
|
||||
extern void zio_delay_init(zio_t *zio);
|
||||
extern void zio_delay_interrupt(zio_t *zio);
|
||||
|
||||
extern zio_t *zio_walk_parents(zio_t *cio);
|
||||
extern zio_t *zio_walk_children(zio_t *pio);
|
||||
extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
|
||||
extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
|
||||
extern zio_t *zio_unique_parent(zio_t *cio);
|
||||
extern void zio_add_child(zio_t *pio, zio_t *cio);
|
||||
|
||||
@ -564,6 +570,10 @@ extern void zio_buf_free(void *buf, size_t size);
|
||||
extern void *zio_data_buf_alloc(size_t size);
|
||||
extern void zio_data_buf_free(void *buf, size_t size);
|
||||
|
||||
extern void zio_push_transform(zio_t *zio, void *data, uint64_t size,
|
||||
uint64_t bufsize, zio_transform_func_t *transform);
|
||||
extern void zio_pop_transforms(zio_t *zio);
|
||||
|
||||
extern void zio_resubmit_stage_async(void *);
|
||||
|
||||
extern zio_t *zio_vdev_child_io(zio_t *zio, blkptr_t *bp, vdev_t *vd,
|
||||
|
@ -99,8 +99,12 @@ extern zio_checksum_tmpl_init_t zio_checksum_edonr_tmpl_init;
|
||||
extern zio_checksum_tmpl_free_t zio_checksum_edonr_tmpl_free;
|
||||
#endif
|
||||
|
||||
extern int zio_checksum_equal(spa_t *, blkptr_t *, enum zio_checksum,
|
||||
void *, uint64_t, uint64_t, zio_bad_cksum_t *);
|
||||
extern void zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||
void *data, uint64_t size);
|
||||
extern int zio_checksum_error_impl(spa_t *, blkptr_t *, enum zio_checksum,
|
||||
void *, uint64_t, uint64_t, zio_bad_cksum_t *);
|
||||
extern int zio_checksum_error(zio_t *zio, zio_bad_cksum_t *out);
|
||||
extern enum zio_checksum spa_dedup_checksum(spa_t *spa);
|
||||
extern void zio_checksum_templates_free(spa_t *spa);
|
||||
|
@ -24,7 +24,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_IMPL_H
|
||||
@ -108,35 +108,37 @@ enum zio_stage {
|
||||
ZIO_STAGE_OPEN = 1 << 0, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_READ_BP_INIT = 1 << 1, /* R---- */
|
||||
ZIO_STAGE_FREE_BP_INIT = 1 << 2, /* --F-- */
|
||||
ZIO_STAGE_ISSUE_ASYNC = 1 << 3, /* RWF-- */
|
||||
ZIO_STAGE_WRITE_BP_INIT = 1 << 4, /* -W--- */
|
||||
ZIO_STAGE_WRITE_BP_INIT = 1 << 2, /* -W--- */
|
||||
ZIO_STAGE_FREE_BP_INIT = 1 << 3, /* --F-- */
|
||||
ZIO_STAGE_ISSUE_ASYNC = 1 << 4, /* RWF-- */
|
||||
ZIO_STAGE_WRITE_COMPRESS = 1 << 5, /* -W--- */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 5, /* -W--- */
|
||||
ZIO_STAGE_CHECKSUM_GENERATE = 1 << 6, /* -W--- */
|
||||
|
||||
ZIO_STAGE_NOP_WRITE = 1 << 6, /* -W--- */
|
||||
ZIO_STAGE_NOP_WRITE = 1 << 7, /* -W--- */
|
||||
|
||||
ZIO_STAGE_DDT_READ_START = 1 << 7, /* R---- */
|
||||
ZIO_STAGE_DDT_READ_DONE = 1 << 8, /* R---- */
|
||||
ZIO_STAGE_DDT_WRITE = 1 << 9, /* -W--- */
|
||||
ZIO_STAGE_DDT_FREE = 1 << 10, /* --F-- */
|
||||
ZIO_STAGE_DDT_READ_START = 1 << 8, /* R---- */
|
||||
ZIO_STAGE_DDT_READ_DONE = 1 << 9, /* R---- */
|
||||
ZIO_STAGE_DDT_WRITE = 1 << 10, /* -W--- */
|
||||
ZIO_STAGE_DDT_FREE = 1 << 11, /* --F-- */
|
||||
|
||||
ZIO_STAGE_GANG_ASSEMBLE = 1 << 11, /* RWFC- */
|
||||
ZIO_STAGE_GANG_ISSUE = 1 << 12, /* RWFC- */
|
||||
ZIO_STAGE_GANG_ASSEMBLE = 1 << 12, /* RWFC- */
|
||||
ZIO_STAGE_GANG_ISSUE = 1 << 13, /* RWFC- */
|
||||
|
||||
ZIO_STAGE_DVA_ALLOCATE = 1 << 13, /* -W--- */
|
||||
ZIO_STAGE_DVA_FREE = 1 << 14, /* --F-- */
|
||||
ZIO_STAGE_DVA_CLAIM = 1 << 15, /* ---C- */
|
||||
ZIO_STAGE_DVA_THROTTLE = 1 << 14, /* -W--- */
|
||||
ZIO_STAGE_DVA_ALLOCATE = 1 << 15, /* -W--- */
|
||||
ZIO_STAGE_DVA_FREE = 1 << 16, /* --F-- */
|
||||
ZIO_STAGE_DVA_CLAIM = 1 << 17, /* ---C- */
|
||||
|
||||
ZIO_STAGE_READY = 1 << 16, /* RWFCI */
|
||||
ZIO_STAGE_READY = 1 << 18, /* RWFCI */
|
||||
|
||||
ZIO_STAGE_VDEV_IO_START = 1 << 17, /* RWF-I */
|
||||
ZIO_STAGE_VDEV_IO_DONE = 1 << 18, /* RWF-- */
|
||||
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 19, /* RWF-I */
|
||||
ZIO_STAGE_VDEV_IO_START = 1 << 19, /* RWF-I */
|
||||
ZIO_STAGE_VDEV_IO_DONE = 1 << 20, /* RWF-I */
|
||||
ZIO_STAGE_VDEV_IO_ASSESS = 1 << 21, /* RWF-I */
|
||||
|
||||
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 20, /* R---- */
|
||||
ZIO_STAGE_CHECKSUM_VERIFY = 1 << 22, /* R---- */
|
||||
|
||||
ZIO_STAGE_DONE = 1 << 21 /* RWFCI */
|
||||
ZIO_STAGE_DONE = 1 << 23 /* RWFCI */
|
||||
};
|
||||
|
||||
#define ZIO_INTERLOCK_STAGES \
|
||||
@ -187,22 +189,27 @@ enum zio_stage {
|
||||
|
||||
#define ZIO_REWRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
ZIO_STAGE_WRITE_COMPRESS | \
|
||||
ZIO_STAGE_WRITE_BP_INIT)
|
||||
|
||||
#define ZIO_WRITE_PIPELINE \
|
||||
(ZIO_WRITE_COMMON_STAGES | \
|
||||
ZIO_STAGE_WRITE_BP_INIT | \
|
||||
ZIO_STAGE_WRITE_COMPRESS | \
|
||||
ZIO_STAGE_DVA_THROTTLE | \
|
||||
ZIO_STAGE_DVA_ALLOCATE)
|
||||
|
||||
#define ZIO_DDT_CHILD_WRITE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_VDEV_IO_STAGES | \
|
||||
ZIO_STAGE_DVA_THROTTLE | \
|
||||
ZIO_STAGE_DVA_ALLOCATE)
|
||||
|
||||
#define ZIO_DDT_WRITE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_STAGE_ISSUE_ASYNC | \
|
||||
ZIO_STAGE_WRITE_BP_INIT | \
|
||||
ZIO_STAGE_ISSUE_ASYNC | \
|
||||
ZIO_STAGE_WRITE_COMPRESS | \
|
||||
ZIO_STAGE_CHECKSUM_GENERATE | \
|
||||
ZIO_STAGE_DDT_WRITE)
|
||||
|
||||
|
@ -441,6 +441,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
||||
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&vd->vdev_queue_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
for (int t = 0; t < DTL_TYPES; t++) {
|
||||
vd->vdev_dtl[t] = range_tree_create(NULL, NULL,
|
||||
&vd->vdev_dtl_lock);
|
||||
@ -770,6 +771,7 @@ vdev_free(vdev_t *vd)
|
||||
}
|
||||
mutex_exit(&vd->vdev_dtl_lock);
|
||||
|
||||
mutex_destroy(&vd->vdev_queue_lock);
|
||||
mutex_destroy(&vd->vdev_dtl_lock);
|
||||
mutex_destroy(&vd->vdev_stat_lock);
|
||||
mutex_destroy(&vd->vdev_probe_lock);
|
||||
@ -1086,7 +1088,8 @@ vdev_probe_done(zio_t *zio)
|
||||
vd->vdev_probe_zio = NULL;
|
||||
mutex_exit(&vd->vdev_probe_lock);
|
||||
|
||||
while ((pio = zio_walk_parents(zio)) != NULL)
|
||||
zio_link_t *zl = NULL;
|
||||
while ((pio = zio_walk_parents(zio, &zl)) != NULL)
|
||||
if (!vdev_accessible(vd, pio))
|
||||
pio->io_error = SET_ERROR(ENXIO);
|
||||
|
||||
@ -2857,7 +2860,8 @@ vdev_allocatable(vdev_t *vd)
|
||||
* we're asking two separate questions about it.
|
||||
*/
|
||||
return (!(state < VDEV_STATE_DEGRADED && state != VDEV_STATE_CLOSED) &&
|
||||
!vd->vdev_cant_write && !vd->vdev_ishole);
|
||||
!vd->vdev_cant_write && !vd->vdev_ishole &&
|
||||
vd->vdev_mg->mg_initialized);
|
||||
}
|
||||
|
||||
boolean_t
|
||||
@ -2885,6 +2889,7 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
vdev_t *tvd = vd->vdev_top;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_ALL, RW_READER) != 0);
|
||||
|
||||
@ -2895,8 +2900,15 @@ vdev_get_stats(vdev_t *vd, vdev_stat_t *vs)
|
||||
vs->vs_rsize = vdev_get_min_asize(vd);
|
||||
if (vd->vdev_ops->vdev_op_leaf)
|
||||
vs->vs_rsize += VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE;
|
||||
if (vd->vdev_max_asize != 0)
|
||||
vs->vs_esize = vd->vdev_max_asize - vd->vdev_asize;
|
||||
/*
|
||||
* Report expandable space on top-level, non-auxillary devices only.
|
||||
* The expandable space is reported in terms of metaslab sized units
|
||||
* since that determines how much space the pool can expand.
|
||||
*/
|
||||
if (vd->vdev_aux == NULL && tvd != NULL && vd->vdev_max_asize != 0) {
|
||||
vs->vs_esize = P2ALIGN(vd->vdev_max_asize - vd->vdev_asize,
|
||||
1ULL << tvd->vdev_ms_shift);
|
||||
}
|
||||
vs->vs_configured_ashift = vd->vdev_top != NULL
|
||||
? vd->vdev_top->vdev_ashift : vd->vdev_ashift;
|
||||
vs->vs_logical_ashift = vd->vdev_logical_ashift;
|
||||
|
@ -23,7 +23,7 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2013 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -247,7 +247,8 @@ vdev_cache_fill(zio_t *fio)
|
||||
* any reads that were queued up before the missed update are still
|
||||
* valid, so we can satisfy them from this line before we evict it.
|
||||
*/
|
||||
while ((pio = zio_walk_parents(fio)) != NULL)
|
||||
zio_link_t *zl = NULL;
|
||||
while ((pio = zio_walk_parents(fio, &zl)) != NULL)
|
||||
vdev_cache_hit(vc, ve, pio);
|
||||
|
||||
if (fio->io_error || ve->ve_missed_update)
|
||||
|
@ -241,34 +241,6 @@ vdev_disk_rele(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
vdev_disk_get_space(vdev_t *vd, uint64_t capacity, uint_t blksz)
|
||||
{
|
||||
ASSERT(vd->vdev_wholedisk);
|
||||
|
||||
vdev_disk_t *dvd = vd->vdev_tsd;
|
||||
dk_efi_t dk_ioc;
|
||||
efi_gpt_t *efi;
|
||||
uint64_t avail_space = 0;
|
||||
int efisize = EFI_LABEL_SIZE * 2;
|
||||
|
||||
dk_ioc.dki_data = kmem_alloc(efisize, KM_SLEEP);
|
||||
dk_ioc.dki_lba = 1;
|
||||
dk_ioc.dki_length = efisize;
|
||||
dk_ioc.dki_data_64 = (uint64_t)(uintptr_t)dk_ioc.dki_data;
|
||||
efi = dk_ioc.dki_data;
|
||||
|
||||
if (ldi_ioctl(dvd->vd_lh, DKIOCGETEFI, (intptr_t)&dk_ioc,
|
||||
FKIOCTL, kcred, NULL) == 0) {
|
||||
uint64_t efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA);
|
||||
|
||||
if (capacity > efi_altern_lba)
|
||||
avail_space = (capacity - efi_altern_lba) * blksz;
|
||||
}
|
||||
kmem_free(dk_ioc.dki_data, efisize);
|
||||
return (avail_space);
|
||||
}
|
||||
|
||||
/*
|
||||
* We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when
|
||||
* even a fallback to DKIOCGMEDIAINFO fails.
|
||||
@ -559,10 +531,7 @@ vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
* Adjust max_psize upward accordingly since we know
|
||||
* we own the whole disk now.
|
||||
*/
|
||||
*max_psize += vdev_disk_get_space(vd, capacity, blksz);
|
||||
zfs_dbgmsg("capacity change: vdev %s, psize %llu, "
|
||||
"max_psize %llu", vd->vdev_path, *psize,
|
||||
*max_psize);
|
||||
*max_psize = capacity * blksz;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -24,7 +24,7 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -293,9 +293,10 @@ vdev_mirror_scrub_done(zio_t *zio)
|
||||
|
||||
if (zio->io_error == 0) {
|
||||
zio_t *pio;
|
||||
zio_link_t *zl = NULL;
|
||||
|
||||
mutex_enter(&zio->io_lock);
|
||||
while ((pio = zio_walk_parents(zio)) != NULL) {
|
||||
while ((pio = zio_walk_parents(zio, &zl)) != NULL) {
|
||||
mutex_enter(&pio->io_lock);
|
||||
ASSERT3U(zio->io_size, >=, pio->io_size);
|
||||
bcopy(zio->io_data, pio->io_data, pio->io_size);
|
||||
|
@ -34,6 +34,7 @@
|
||||
#include <sys/zio.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/dsl_pool.h>
|
||||
#include <sys/metaslab_impl.h>
|
||||
|
||||
/*
|
||||
* ZFS I/O Scheduler
|
||||
@ -175,6 +176,23 @@ int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE;
|
||||
int zfs_vdev_read_gap_limit = 32 << 10;
|
||||
int zfs_vdev_write_gap_limit = 4 << 10;
|
||||
|
||||
/*
|
||||
* Define the queue depth percentage for each top-level. This percentage is
|
||||
* used in conjunction with zfs_vdev_async_max_active to determine how many
|
||||
* allocations a specific top-level vdev should handle. Once the queue depth
|
||||
* reaches zfs_vdev_queue_depth_pct * zfs_vdev_async_write_max_active / 100
|
||||
* then allocator will stop allocating blocks on that top-level device.
|
||||
* The default kernel setting is 1000% which will yield 100 allocations per
|
||||
* device. For userland testing, the default setting is 300% which equates
|
||||
* to 30 allocations per device.
|
||||
*/
|
||||
#ifdef _KERNEL
|
||||
int zfs_vdev_queue_depth_pct = 1000;
|
||||
#else
|
||||
int zfs_vdev_queue_depth_pct = 300;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#ifdef _KERNEL
|
||||
SYSCTL_DECL(_vfs_zfs_vdev);
|
||||
@ -233,6 +251,9 @@ SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, read_gap_limit, CTLFLAG_RWTUN,
|
||||
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, write_gap_limit, CTLFLAG_RWTUN,
|
||||
&zfs_vdev_write_gap_limit, 0,
|
||||
"Acceptable gap between two writes being aggregated");
|
||||
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, queue_depth_pct, CTLFLAG_RWTUN,
|
||||
&zfs_vdev_queue_depth_pct, 0,
|
||||
"Queue depth percentage for each top-level");
|
||||
|
||||
static int
|
||||
sysctl_zfs_async_write_active_min_dirty_percent(SYSCTL_HANDLER_ARGS)
|
||||
@ -390,6 +411,7 @@ vdev_queue_io_add(vdev_queue_t *vq, zio_t *zio)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
avl_tree_t *qtt;
|
||||
|
||||
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
||||
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
||||
avl_add(vdev_queue_class_tree(vq, zio->io_priority), zio);
|
||||
@ -411,6 +433,7 @@ vdev_queue_io_remove(vdev_queue_t *vq, zio_t *zio)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
avl_tree_t *qtt;
|
||||
|
||||
ASSERT(MUTEX_HELD(&vq->vq_lock));
|
||||
ASSERT3U(zio->io_priority, <, ZIO_PRIORITY_NUM_QUEUEABLE);
|
||||
avl_remove(vdev_queue_class_tree(vq, zio->io_priority), zio);
|
||||
@ -480,7 +503,8 @@ vdev_queue_agg_io_done(zio_t *aio)
|
||||
{
|
||||
if (aio->io_type == ZIO_TYPE_READ) {
|
||||
zio_t *pio;
|
||||
while ((pio = zio_walk_parents(aio)) != NULL) {
|
||||
zio_link_t *zl = NULL;
|
||||
while ((pio = zio_walk_parents(aio, &zl)) != NULL) {
|
||||
bcopy((char *)aio->io_data + (pio->io_offset -
|
||||
aio->io_offset), pio->io_data, pio->io_size);
|
||||
}
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
*/
|
||||
|
||||
@ -270,6 +270,7 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
|
||||
uint64_t blk, off;
|
||||
int err;
|
||||
dmu_buf_t *db;
|
||||
dnode_t *dn;
|
||||
int bs = FZAP_BLOCK_SHIFT(zap);
|
||||
|
||||
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
|
||||
@ -277,8 +278,15 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
|
||||
blk = idx >> (bs-3);
|
||||
off = idx & ((1<<(bs-3))-1);
|
||||
|
||||
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
/*
|
||||
* Note: this is equivalent to dmu_buf_hold(), but we use
|
||||
* _dnode_enter / _by_dnode because it's faster because we don't
|
||||
* have to hold the dnode.
|
||||
*/
|
||||
dn = dmu_buf_dnode_enter(zap->zap_dbuf);
|
||||
err = dmu_buf_hold_by_dnode(dn,
|
||||
(tbl->zt_blk + blk) << bs, FTAG, &db, DMU_READ_NO_PREFETCH);
|
||||
dmu_buf_dnode_exit(zap->zap_dbuf);
|
||||
if (err)
|
||||
return (err);
|
||||
*valp = ((uint64_t *)db->db_data)[off];
|
||||
@ -292,9 +300,11 @@ zap_table_load(zap_t *zap, zap_table_phys_t *tbl, uint64_t idx, uint64_t *valp)
|
||||
*/
|
||||
blk = (idx*2) >> (bs-3);
|
||||
|
||||
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
dn = dmu_buf_dnode_enter(zap->zap_dbuf);
|
||||
err = dmu_buf_hold_by_dnode(dn,
|
||||
(tbl->zt_nextblk + blk) << bs, FTAG, &db,
|
||||
DMU_READ_NO_PREFETCH);
|
||||
dmu_buf_dnode_exit(zap->zap_dbuf);
|
||||
if (err == 0)
|
||||
dmu_buf_rele(db, FTAG);
|
||||
}
|
||||
@ -505,8 +515,10 @@ zap_get_leaf_byblk(zap_t *zap, uint64_t blkid, dmu_tx_t *tx, krw_t lt,
|
||||
|
||||
ASSERT(RW_LOCK_HELD(&zap->zap_rwlock));
|
||||
|
||||
err = dmu_buf_hold(zap->zap_objset, zap->zap_object,
|
||||
dnode_t *dn = dmu_buf_dnode_enter(zap->zap_dbuf);
|
||||
err = dmu_buf_hold_by_dnode(dn,
|
||||
blkid << bs, NULL, &db, DMU_READ_NO_PREFETCH);
|
||||
dmu_buf_dnode_exit(zap->zap_dbuf);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
@ -596,7 +608,8 @@ zap_deref_leaf(zap_t *zap, uint64_t h, dmu_tx_t *tx, krw_t lt, zap_leaf_t **lp)
|
||||
}
|
||||
|
||||
static int
|
||||
zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
|
||||
zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l,
|
||||
void *tag, dmu_tx_t *tx, zap_leaf_t **lp)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
uint64_t hash = zn->zn_hash;
|
||||
@ -618,9 +631,9 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
|
||||
uint64_t object = zap->zap_object;
|
||||
|
||||
zap_put_leaf(l);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, tag);
|
||||
err = zap_lockdir(os, object, tx, RW_WRITER,
|
||||
FALSE, FALSE, &zn->zn_zap);
|
||||
FALSE, FALSE, tag, &zn->zn_zap);
|
||||
zap = zn->zn_zap;
|
||||
if (err)
|
||||
return (err);
|
||||
@ -683,7 +696,8 @@ zap_expand_leaf(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx, zap_leaf_t **lp)
|
||||
}
|
||||
|
||||
static void
|
||||
zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
|
||||
zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l,
|
||||
void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
zap_t *zap = zn->zn_zap;
|
||||
int shift = zap_f_phys(zap)->zap_ptrtbl.zt_shift;
|
||||
@ -703,9 +717,9 @@ zap_put_leaf_maybe_grow_ptrtbl(zap_name_t *zn, zap_leaf_t *l, dmu_tx_t *tx)
|
||||
objset_t *os = zap->zap_objset;
|
||||
uint64_t zapobj = zap->zap_object;
|
||||
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, tag);
|
||||
err = zap_lockdir(os, zapobj, tx,
|
||||
RW_WRITER, FALSE, FALSE, &zn->zn_zap);
|
||||
RW_WRITER, FALSE, FALSE, tag, &zn->zn_zap);
|
||||
zap = zn->zn_zap;
|
||||
if (err)
|
||||
return;
|
||||
@ -795,7 +809,7 @@ fzap_lookup(zap_name_t *zn,
|
||||
int
|
||||
fzap_add_cd(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, uint32_t cd, dmu_tx_t *tx)
|
||||
const void *val, uint32_t cd, void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
zap_leaf_t *l;
|
||||
int err;
|
||||
@ -824,7 +838,7 @@ fzap_add_cd(zap_name_t *zn,
|
||||
if (err == 0) {
|
||||
zap_increment_num_entries(zap, 1, tx);
|
||||
} else if (err == EAGAIN) {
|
||||
err = zap_expand_leaf(zn, l, tx, &l);
|
||||
err = zap_expand_leaf(zn, l, tag, tx, &l);
|
||||
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
|
||||
if (err == 0)
|
||||
goto retry;
|
||||
@ -832,26 +846,27 @@ fzap_add_cd(zap_name_t *zn,
|
||||
|
||||
out:
|
||||
if (zap != NULL)
|
||||
zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
|
||||
zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
fzap_add(zap_name_t *zn,
|
||||
uint64_t integer_size, uint64_t num_integers,
|
||||
const void *val, dmu_tx_t *tx)
|
||||
const void *val, void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
int err = fzap_check(zn, integer_size, num_integers);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
return (fzap_add_cd(zn, integer_size, num_integers,
|
||||
val, ZAP_NEED_CD, tx));
|
||||
val, ZAP_NEED_CD, tag, tx));
|
||||
}
|
||||
|
||||
int
|
||||
fzap_update(zap_name_t *zn,
|
||||
int integer_size, uint64_t num_integers, const void *val, dmu_tx_t *tx)
|
||||
int integer_size, uint64_t num_integers, const void *val,
|
||||
void *tag, dmu_tx_t *tx)
|
||||
{
|
||||
zap_leaf_t *l;
|
||||
int err, create;
|
||||
@ -881,14 +896,14 @@ fzap_update(zap_name_t *zn,
|
||||
}
|
||||
|
||||
if (err == EAGAIN) {
|
||||
err = zap_expand_leaf(zn, l, tx, &l);
|
||||
err = zap_expand_leaf(zn, l, tag, tx, &l);
|
||||
zap = zn->zn_zap; /* zap_expand_leaf() may change zap */
|
||||
if (err == 0)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (zap != NULL)
|
||||
zap_put_leaf_maybe_grow_ptrtbl(zn, l, tx);
|
||||
zap_put_leaf_maybe_grow_ptrtbl(zn, l, tag, tx);
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
*/
|
||||
@ -43,7 +43,8 @@
|
||||
|
||||
extern inline mzap_phys_t *zap_m_phys(zap_t *zap);
|
||||
|
||||
static int mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags);
|
||||
static int mzap_upgrade(zap_t **zapp,
|
||||
void *tag, dmu_tx_t *tx, zap_flags_t flags);
|
||||
|
||||
uint64_t
|
||||
zap_getflags(zap_t *zap)
|
||||
@ -468,21 +469,19 @@ mzap_open(objset_t *os, uint64_t obj, dmu_buf_t *db)
|
||||
return (winner);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
static int
|
||||
zap_lockdir_impl(dmu_buf_t *db, void *tag, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, zap_t **zapp)
|
||||
{
|
||||
zap_t *zap;
|
||||
dmu_buf_t *db;
|
||||
krw_t lt;
|
||||
int err;
|
||||
|
||||
ASSERT0(db->db_offset);
|
||||
objset_t *os = dmu_buf_get_objset(db);
|
||||
uint64_t obj = db->db_object;
|
||||
|
||||
*zapp = NULL;
|
||||
|
||||
err = dmu_buf_hold(os, obj, 0, NULL, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
#ifdef ZFS_DEBUG
|
||||
{
|
||||
dmu_object_info_t doi;
|
||||
@ -499,7 +498,6 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
* mzap_open() didn't like what it saw on-disk.
|
||||
* Check for corruption!
|
||||
*/
|
||||
dmu_buf_rele(db, NULL);
|
||||
return (SET_ERROR(EIO));
|
||||
}
|
||||
}
|
||||
@ -538,10 +536,12 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
dprintf("upgrading obj %llu: num_entries=%u\n",
|
||||
obj, zap->zap_m.zap_num_entries);
|
||||
*zapp = zap;
|
||||
return (mzap_upgrade(zapp, tx, 0));
|
||||
int err = mzap_upgrade(zapp, tag, tx, 0);
|
||||
if (err != 0)
|
||||
rw_exit(&zap->zap_rwlock);
|
||||
return (err);
|
||||
}
|
||||
err = dmu_object_set_blocksize(os, obj, newsz, 0, tx);
|
||||
ASSERT0(err);
|
||||
VERIFY0(dmu_object_set_blocksize(os, obj, newsz, 0, tx));
|
||||
zap->zap_m.zap_num_chunks =
|
||||
db->db_size / MZAP_ENT_LEN - 1;
|
||||
}
|
||||
@ -550,15 +550,49 @@ zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
zap_lockdir_by_dnode(dnode_t *dn, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
err = dmu_buf_hold_by_dnode(dn, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0) {
|
||||
return (err);
|
||||
}
|
||||
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0) {
|
||||
dmu_buf_rele(db, tag);
|
||||
}
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lockdir(objset_t *os, uint64_t obj, dmu_tx_t *tx,
|
||||
krw_t lti, boolean_t fatreader, boolean_t adding, void *tag, zap_t **zapp)
|
||||
{
|
||||
dmu_buf_t *db;
|
||||
int err;
|
||||
|
||||
err = dmu_buf_hold(os, obj, 0, tag, &db, DMU_READ_NO_PREFETCH);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lockdir_impl(db, tag, tx, lti, fatreader, adding, zapp);
|
||||
if (err != 0)
|
||||
dmu_buf_rele(db, tag);
|
||||
return (err);
|
||||
}
|
||||
|
||||
void
|
||||
zap_unlockdir(zap_t *zap)
|
||||
zap_unlockdir(zap_t *zap, void *tag)
|
||||
{
|
||||
rw_exit(&zap->zap_rwlock);
|
||||
dmu_buf_rele(zap->zap_dbuf, NULL);
|
||||
dmu_buf_rele(zap->zap_dbuf, tag);
|
||||
}
|
||||
|
||||
static int
|
||||
mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
|
||||
mzap_upgrade(zap_t **zapp, void *tag, dmu_tx_t *tx, zap_flags_t flags)
|
||||
{
|
||||
mzap_phys_t *mzp;
|
||||
int i, sz, nchunks;
|
||||
@ -596,7 +630,8 @@ mzap_upgrade(zap_t **zapp, dmu_tx_t *tx, zap_flags_t flags)
|
||||
dprintf("adding %s=%llu\n",
|
||||
mze->mze_name, mze->mze_value);
|
||||
zn = zap_name_alloc(zap, mze->mze_name, MT_EXACT);
|
||||
err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd, tx);
|
||||
err = fzap_add_cd(zn, 8, 1, &mze->mze_value, mze->mze_cd,
|
||||
tag, tx);
|
||||
zap = zn->zn_zap; /* fzap_add_cd() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (err)
|
||||
@ -635,9 +670,9 @@ mzap_create_impl(objset_t *os, uint64_t obj, int normflags, zap_flags_t flags,
|
||||
zap_t *zap;
|
||||
/* Only fat zap supports flags; upgrade immediately. */
|
||||
VERIFY(0 == zap_lockdir(os, obj, tx, RW_WRITER,
|
||||
B_FALSE, B_FALSE, &zap));
|
||||
VERIFY3U(0, ==, mzap_upgrade(&zap, tx, flags));
|
||||
zap_unlockdir(zap);
|
||||
B_FALSE, B_FALSE, FTAG, &zap));
|
||||
VERIFY3U(0, ==, mzap_upgrade(&zap, FTAG, tx, flags));
|
||||
zap_unlockdir(zap, FTAG);
|
||||
}
|
||||
}
|
||||
|
||||
@ -732,7 +767,7 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
|
||||
zap_t *zap;
|
||||
int err;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
if (!zap->zap_ismicro) {
|
||||
@ -740,7 +775,7 @@ zap_count(objset_t *os, uint64_t zapobj, uint64_t *count)
|
||||
} else {
|
||||
*count = zap->zap_m.zap_num_entries;
|
||||
}
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -797,25 +832,19 @@ zap_lookup(objset_t *os, uint64_t zapobj, const char *name,
|
||||
num_integers, buf, MT_EXACT, NULL, 0, NULL));
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
static int
|
||||
zap_lookup_impl(zap_t *zap, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp)
|
||||
{
|
||||
zap_t *zap;
|
||||
int err;
|
||||
int err = 0;
|
||||
mzap_ent_t *mze;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc(zap, name, mt);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
if (zn == NULL)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
if (!zap->zap_ismicro) {
|
||||
err = fzap_lookup(zn, integer_size, num_integers, buf,
|
||||
@ -842,7 +871,51 @@ zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
}
|
||||
}
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp)
|
||||
{
|
||||
zap_t *zap;
|
||||
int err;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_impl(zap, name, integer_size,
|
||||
num_integers, buf, mt, realname, rn_len, ncp);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf)
|
||||
{
|
||||
return (zap_lookup_norm_by_dnode(dn, name, integer_size,
|
||||
num_integers, buf, MT_EXACT, NULL, 0, NULL));
|
||||
}
|
||||
|
||||
int
|
||||
zap_lookup_norm_by_dnode(dnode_t *dn, const char *name,
|
||||
uint64_t integer_size, uint64_t num_integers, void *buf,
|
||||
matchtype_t mt, char *realname, int rn_len,
|
||||
boolean_t *ncp)
|
||||
{
|
||||
zap_t *zap;
|
||||
int err;
|
||||
|
||||
err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
err = zap_lookup_impl(zap, name, integer_size,
|
||||
num_integers, buf, mt, realname, rn_len, ncp);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -854,18 +927,18 @@ zap_prefetch_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int err;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
fzap_prefetch(zn);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -877,19 +950,19 @@ zap_lookup_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int err;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
|
||||
err = fzap_lookup(zn, integer_size, num_integers, buf,
|
||||
NULL, 0, NULL);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -912,12 +985,12 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
|
||||
mzap_ent_t *mze;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc(zap, name, MT_EXACT);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
if (!zap->zap_ismicro) {
|
||||
@ -934,7 +1007,7 @@ zap_length(objset_t *os, uint64_t zapobj, const char *name,
|
||||
}
|
||||
}
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -946,17 +1019,17 @@ zap_length_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int err;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_length(zn, integer_size, num_integers);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1015,22 +1088,24 @@ zap_add(objset_t *os, uint64_t zapobj, const char *key,
|
||||
const uint64_t *intval = val;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc(zap, key, MT_EXACT);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
if (!zap->zap_ismicro) {
|
||||
err = fzap_add(zn, integer_size, num_integers, val, tx);
|
||||
err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
} else if (integer_size != 8 || num_integers != 1 ||
|
||||
strlen(key) >= MZAP_NAME_LEN) {
|
||||
err = mzap_upgrade(&zn->zn_zap, tx, 0);
|
||||
if (err == 0)
|
||||
err = fzap_add(zn, integer_size, num_integers, val, tx);
|
||||
err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
|
||||
if (err == 0) {
|
||||
err = fzap_add(zn, integer_size, num_integers, val,
|
||||
FTAG, tx);
|
||||
}
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
} else {
|
||||
mze = mze_find(zn);
|
||||
@ -1043,7 +1118,7 @@ zap_add(objset_t *os, uint64_t zapobj, const char *key,
|
||||
ASSERT(zap == zn->zn_zap);
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_add() failed */
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1056,19 +1131,19 @@ zap_add_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int err;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_add(zn, integer_size, num_integers, val, tx);
|
||||
err = fzap_add(zn, integer_size, num_integers, val, FTAG, tx);
|
||||
zap = zn->zn_zap; /* fzap_add() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_add() failed */
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1092,25 +1167,27 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
|
||||
(void) zap_lookup(os, zapobj, name, 8, 1, &oldval);
|
||||
#endif
|
||||
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc(zap, name, MT_EXACT);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
if (!zap->zap_ismicro) {
|
||||
err = fzap_update(zn, integer_size, num_integers, val, tx);
|
||||
err = fzap_update(zn, integer_size, num_integers, val,
|
||||
FTAG, tx);
|
||||
zap = zn->zn_zap; /* fzap_update() may change zap */
|
||||
} else if (integer_size != 8 || num_integers != 1 ||
|
||||
strlen(name) >= MZAP_NAME_LEN) {
|
||||
dprintf("upgrading obj %llu: intsz=%u numint=%llu name=%s\n",
|
||||
zapobj, integer_size, num_integers, name);
|
||||
err = mzap_upgrade(&zn->zn_zap, tx, 0);
|
||||
if (err == 0)
|
||||
err = mzap_upgrade(&zn->zn_zap, FTAG, tx, 0);
|
||||
if (err == 0) {
|
||||
err = fzap_update(zn, integer_size, num_integers,
|
||||
val, tx);
|
||||
val, FTAG, tx);
|
||||
}
|
||||
zap = zn->zn_zap; /* fzap_update() may change zap */
|
||||
} else {
|
||||
mze = mze_find(zn);
|
||||
@ -1124,7 +1201,7 @@ zap_update(objset_t *os, uint64_t zapobj, const char *name,
|
||||
ASSERT(zap == zn->zn_zap);
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1137,19 +1214,19 @@ zap_update_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
zap_name_t *zn;
|
||||
int err;
|
||||
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, &zap);
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, TRUE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_update(zn, integer_size, num_integers, val, tx);
|
||||
err = fzap_update(zn, integer_size, num_integers, val, FTAG, tx);
|
||||
zap = zn->zn_zap; /* fzap_update() may change zap */
|
||||
zap_name_free(zn);
|
||||
if (zap != NULL) /* may be NULL if fzap_upgrade() failed */
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1168,12 +1245,12 @@ zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
mzap_ent_t *mze;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc(zap, name, mt);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
if (!zap->zap_ismicro) {
|
||||
@ -1190,7 +1267,7 @@ zap_remove_norm(objset_t *os, uint64_t zapobj, const char *name,
|
||||
}
|
||||
}
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1202,17 +1279,17 @@ zap_remove_uint64(objset_t *os, uint64_t zapobj, const uint64_t *key,
|
||||
int err;
|
||||
zap_name_t *zn;
|
||||
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, tx, RW_WRITER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
zn = zap_name_alloc_uint64(zap, key, key_numints);
|
||||
if (zn == NULL) {
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
err = fzap_remove(zn, tx);
|
||||
zap_name_free(zn);
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1244,7 +1321,7 @@ zap_cursor_fini(zap_cursor_t *zc)
|
||||
{
|
||||
if (zc->zc_zap) {
|
||||
rw_enter(&zc->zc_zap->zap_rwlock, RW_READER);
|
||||
zap_unlockdir(zc->zc_zap);
|
||||
zap_unlockdir(zc->zc_zap, NULL);
|
||||
zc->zc_zap = NULL;
|
||||
}
|
||||
if (zc->zc_leaf) {
|
||||
@ -1291,7 +1368,7 @@ zap_cursor_retrieve(zap_cursor_t *zc, zap_attribute_t *za)
|
||||
if (zc->zc_zap == NULL) {
|
||||
int hb;
|
||||
err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
|
||||
RW_READER, TRUE, FALSE, &zc->zc_zap);
|
||||
RW_READER, TRUE, FALSE, NULL, &zc->zc_zap);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
@ -1358,7 +1435,7 @@ zap_cursor_move_to_key(zap_cursor_t *zc, const char *name, matchtype_t mt)
|
||||
|
||||
if (zc->zc_zap == NULL) {
|
||||
err = zap_lockdir(zc->zc_objset, zc->zc_zapobj, NULL,
|
||||
RW_READER, TRUE, FALSE, &zc->zc_zap);
|
||||
RW_READER, TRUE, FALSE, FTAG, &zc->zc_zap);
|
||||
if (err)
|
||||
return (err);
|
||||
} else {
|
||||
@ -1395,7 +1472,7 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
|
||||
int err;
|
||||
zap_t *zap;
|
||||
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, FTAG, &zap);
|
||||
if (err)
|
||||
return (err);
|
||||
|
||||
@ -1408,12 +1485,12 @@ zap_get_stats(objset_t *os, uint64_t zapobj, zap_stats_t *zs)
|
||||
} else {
|
||||
fzap_get_stats(zap, zs);
|
||||
}
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
zap_count_write_by_dnode(dnode_t *dn, const char *name, int add,
|
||||
refcount_t *towrite, refcount_t *tooverwrite)
|
||||
{
|
||||
zap_t *zap;
|
||||
@ -1427,7 +1504,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
* - 2 blocks for possibly split leaves,
|
||||
* - 2 grown ptrtbl blocks
|
||||
*
|
||||
* This also accomodates the case where an add operation to a fairly
|
||||
* This also accommodates the case where an add operation to a fairly
|
||||
* large microzap results in a promotion to fatzap.
|
||||
*/
|
||||
if (name == NULL) {
|
||||
@ -1440,10 +1517,11 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
* We lock the zap with adding == FALSE. Because, if we pass
|
||||
* the actual value of add, it could trigger a mzap_upgrade().
|
||||
* At present we are just evaluating the possibility of this operation
|
||||
* and hence we donot want to trigger an upgrade.
|
||||
* and hence we do not want to trigger an upgrade.
|
||||
*/
|
||||
err = zap_lockdir(os, zapobj, NULL, RW_READER, TRUE, FALSE, &zap);
|
||||
if (err)
|
||||
err = zap_lockdir_by_dnode(dn, NULL, RW_READER, TRUE, FALSE,
|
||||
FTAG, &zap);
|
||||
if (err != 0)
|
||||
return (err);
|
||||
|
||||
if (!zap->zap_ismicro) {
|
||||
@ -1489,6 +1567,6 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
}
|
||||
}
|
||||
|
||||
zap_unlockdir(zap);
|
||||
zap_unlockdir(zap, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
*/
|
||||
@ -253,7 +254,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
|
||||
}
|
||||
}
|
||||
|
||||
VERIFY(arc_buf_remove_ref(abuf, &abuf));
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
}
|
||||
|
||||
return (error);
|
||||
@ -290,7 +291,7 @@ zil_read_log_data(zilog_t *zilog, const lr_write_t *lr, void *wbuf)
|
||||
if (error == 0) {
|
||||
if (wbuf != NULL)
|
||||
bcopy(abuf->b_data, wbuf, arc_buf_size(abuf));
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
}
|
||||
|
||||
return (error);
|
||||
|
@ -41,6 +41,7 @@
|
||||
#include <sys/trim_map.h>
|
||||
#include <sys/blkptr.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/metaslab_impl.h>
|
||||
|
||||
SYSCTL_DECL(_vfs_zfs);
|
||||
SYSCTL_NODE(_vfs_zfs, OID_AUTO, zio, CTLFLAG_RW, 0, "ZFS ZIO");
|
||||
@ -78,6 +79,10 @@ const char *zio_type_name[ZIO_TYPES] = {
|
||||
"zio_ioctl"
|
||||
};
|
||||
|
||||
boolean_t zio_dva_throttle_enabled = B_TRUE;
|
||||
SYSCTL_INT(_vfs_zfs_zio, OID_AUTO, dva_throttle_enabled, CTLFLAG_RDTUN,
|
||||
&zio_dva_throttle_enabled, 0, "");
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* I/O kmem caches
|
||||
@ -136,6 +141,8 @@ int zio_buf_debug_limit = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
static void zio_taskq_dispatch(zio_t *, zio_taskq_type_t, boolean_t);
|
||||
|
||||
void
|
||||
zio_init(void)
|
||||
{
|
||||
@ -329,7 +336,7 @@ zio_data_buf_free(void *buf, size_t size)
|
||||
* Push and pop I/O transform buffers
|
||||
* ==========================================================================
|
||||
*/
|
||||
static void
|
||||
void
|
||||
zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
|
||||
zio_transform_func_t *transform)
|
||||
{
|
||||
@ -347,7 +354,7 @@ zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
|
||||
zio->io_size = size;
|
||||
}
|
||||
|
||||
static void
|
||||
void
|
||||
zio_pop_transforms(zio_t *zio)
|
||||
{
|
||||
zio_transform_t *zt;
|
||||
@ -396,52 +403,39 @@ zio_decompress(zio_t *zio, void *data, uint64_t size)
|
||||
* I/O parent/child relationships and pipeline interlocks
|
||||
* ==========================================================================
|
||||
*/
|
||||
/*
|
||||
* NOTE - Callers to zio_walk_parents() and zio_walk_children must
|
||||
* continue calling these functions until they return NULL.
|
||||
* Otherwise, the next caller will pick up the list walk in
|
||||
* some indeterminate state. (Otherwise every caller would
|
||||
* have to pass in a cookie to keep the state represented by
|
||||
* io_walk_link, which gets annoying.)
|
||||
*/
|
||||
zio_t *
|
||||
zio_walk_parents(zio_t *cio)
|
||||
zio_walk_parents(zio_t *cio, zio_link_t **zl)
|
||||
{
|
||||
zio_link_t *zl = cio->io_walk_link;
|
||||
list_t *pl = &cio->io_parent_list;
|
||||
|
||||
zl = (zl == NULL) ? list_head(pl) : list_next(pl, zl);
|
||||
cio->io_walk_link = zl;
|
||||
|
||||
if (zl == NULL)
|
||||
*zl = (*zl == NULL) ? list_head(pl) : list_next(pl, *zl);
|
||||
if (*zl == NULL)
|
||||
return (NULL);
|
||||
|
||||
ASSERT(zl->zl_child == cio);
|
||||
return (zl->zl_parent);
|
||||
ASSERT((*zl)->zl_child == cio);
|
||||
return ((*zl)->zl_parent);
|
||||
}
|
||||
|
||||
zio_t *
|
||||
zio_walk_children(zio_t *pio)
|
||||
zio_walk_children(zio_t *pio, zio_link_t **zl)
|
||||
{
|
||||
zio_link_t *zl = pio->io_walk_link;
|
||||
list_t *cl = &pio->io_child_list;
|
||||
|
||||
zl = (zl == NULL) ? list_head(cl) : list_next(cl, zl);
|
||||
pio->io_walk_link = zl;
|
||||
|
||||
if (zl == NULL)
|
||||
*zl = (*zl == NULL) ? list_head(cl) : list_next(cl, *zl);
|
||||
if (*zl == NULL)
|
||||
return (NULL);
|
||||
|
||||
ASSERT(zl->zl_parent == pio);
|
||||
return (zl->zl_child);
|
||||
ASSERT((*zl)->zl_parent == pio);
|
||||
return ((*zl)->zl_child);
|
||||
}
|
||||
|
||||
zio_t *
|
||||
zio_unique_parent(zio_t *cio)
|
||||
{
|
||||
zio_t *pio = zio_walk_parents(cio);
|
||||
zio_link_t *zl = NULL;
|
||||
zio_t *pio = zio_walk_parents(cio, &zl);
|
||||
|
||||
VERIFY(zio_walk_parents(cio) == NULL);
|
||||
VERIFY3P(zio_walk_parents(cio, &zl), ==, NULL);
|
||||
return (pio);
|
||||
}
|
||||
|
||||
@ -510,6 +504,7 @@ zio_wait_for_children(zio_t *zio, enum zio_child child, enum zio_wait_type wait)
|
||||
ASSERT(zio->io_stall == NULL);
|
||||
if (*countp != 0) {
|
||||
zio->io_stage >>= 1;
|
||||
ASSERT3U(zio->io_stage, !=, ZIO_STAGE_OPEN);
|
||||
zio->io_stall = countp;
|
||||
waiting = B_TRUE;
|
||||
}
|
||||
@ -533,9 +528,18 @@ zio_notify_parent(zio_t *pio, zio_t *zio, enum zio_wait_type wait)
|
||||
(*countp)--;
|
||||
|
||||
if (*countp == 0 && pio->io_stall == countp) {
|
||||
zio_taskq_type_t type =
|
||||
pio->io_stage < ZIO_STAGE_VDEV_IO_START ? ZIO_TASKQ_ISSUE :
|
||||
ZIO_TASKQ_INTERRUPT;
|
||||
pio->io_stall = NULL;
|
||||
mutex_exit(&pio->io_lock);
|
||||
zio_execute(pio);
|
||||
/*
|
||||
* Dispatch the parent zio in its own taskq so that
|
||||
* the child can continue to make progress. This also
|
||||
* prevents overflowing the stack when we have deeply nested
|
||||
* parent-child relationships.
|
||||
*/
|
||||
zio_taskq_dispatch(pio, type, B_FALSE);
|
||||
} else {
|
||||
mutex_exit(&pio->io_lock);
|
||||
}
|
||||
@ -548,6 +552,30 @@ zio_inherit_child_errors(zio_t *zio, enum zio_child c)
|
||||
zio->io_error = zio->io_child_error[c];
|
||||
}
|
||||
|
||||
int
|
||||
zio_timestamp_compare(const void *x1, const void *x2)
|
||||
{
|
||||
const zio_t *z1 = x1;
|
||||
const zio_t *z2 = x2;
|
||||
|
||||
if (z1->io_queued_timestamp < z2->io_queued_timestamp)
|
||||
return (-1);
|
||||
if (z1->io_queued_timestamp > z2->io_queued_timestamp)
|
||||
return (1);
|
||||
|
||||
if (z1->io_offset < z2->io_offset)
|
||||
return (-1);
|
||||
if (z1->io_offset > z2->io_offset)
|
||||
return (1);
|
||||
|
||||
if (z1 < z2)
|
||||
return (-1);
|
||||
if (z1 > z2)
|
||||
return (1);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* ==========================================================================
|
||||
* Create the various types of I/O (read, write, free, etc)
|
||||
@ -616,6 +644,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
zio->io_orig_flags = zio->io_flags = flags;
|
||||
zio->io_orig_stage = zio->io_stage = stage;
|
||||
zio->io_orig_pipeline = zio->io_pipeline = pipeline;
|
||||
zio->io_pipeline_trace = ZIO_STAGE_OPEN;
|
||||
|
||||
zio->io_state[ZIO_WAIT_READY] = (stage >= ZIO_STAGE_READY);
|
||||
zio->io_state[ZIO_WAIT_DONE] = (stage >= ZIO_STAGE_DONE);
|
||||
@ -813,7 +842,7 @@ zio_rewrite(zio_t *pio, spa_t *spa, uint64_t txg, blkptr_t *bp, void *data,
|
||||
zio_t *zio;
|
||||
|
||||
zio = zio_create(pio, spa, txg, bp, data, size, done, private,
|
||||
ZIO_TYPE_WRITE, priority, flags, NULL, 0, zb,
|
||||
ZIO_TYPE_WRITE, priority, flags | ZIO_FLAG_IO_REWRITE, NULL, 0, zb,
|
||||
ZIO_STAGE_OPEN, ZIO_REWRITE_PIPELINE);
|
||||
|
||||
return (zio);
|
||||
@ -934,6 +963,7 @@ zio_claim(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
zio = zio_create(pio, spa, txg, bp, NULL, BP_GET_PSIZE(bp),
|
||||
done, private, ZIO_TYPE_CLAIM, ZIO_PRIORITY_NOW, flags,
|
||||
NULL, 0, NULL, ZIO_STAGE_OPEN, ZIO_CLAIM_PIPELINE);
|
||||
ASSERT0(zio->io_queued_timestamp);
|
||||
|
||||
return (zio);
|
||||
}
|
||||
@ -1022,8 +1052,8 @@ zio_write_phys(zio_t *pio, vdev_t *vd, uint64_t offset, uint64_t size,
|
||||
*/
|
||||
zio_t *
|
||||
zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
|
||||
void *data, uint64_t size, int type, zio_priority_t priority,
|
||||
enum zio_flag flags, zio_done_func_t *done, void *private)
|
||||
void *data, uint64_t size, int type, zio_priority_t priority,
|
||||
enum zio_flag flags, zio_done_func_t *done, void *private)
|
||||
{
|
||||
enum zio_stage pipeline = ZIO_VDEV_CHILD_PIPELINE;
|
||||
zio_t *zio;
|
||||
@ -1058,9 +1088,30 @@ zio_vdev_child_io(zio_t *pio, blkptr_t *bp, vdev_t *vd, uint64_t offset,
|
||||
if (flags & ZIO_FLAG_IO_REPAIR)
|
||||
flags &= ~ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
/*
|
||||
* If we're creating a child I/O that is not associated with a
|
||||
* top-level vdev, then the child zio is not an allocating I/O.
|
||||
* If this is a retried I/O then we ignore it since we will
|
||||
* have already processed the original allocating I/O.
|
||||
*/
|
||||
if (flags & ZIO_FLAG_IO_ALLOCATING &&
|
||||
(vd != vd->vdev_top || (flags & ZIO_FLAG_IO_RETRY))) {
|
||||
metaslab_class_t *mc = spa_normal_class(pio->io_spa);
|
||||
|
||||
ASSERT(mc->mc_alloc_throttle_enabled);
|
||||
ASSERT(type == ZIO_TYPE_WRITE);
|
||||
ASSERT(priority == ZIO_PRIORITY_ASYNC_WRITE);
|
||||
ASSERT(!(flags & ZIO_FLAG_IO_REPAIR));
|
||||
ASSERT(!(pio->io_flags & ZIO_FLAG_IO_REWRITE) ||
|
||||
pio->io_child_type == ZIO_CHILD_GANG);
|
||||
|
||||
flags &= ~ZIO_FLAG_IO_ALLOCATING;
|
||||
}
|
||||
|
||||
zio = zio_create(pio, pio->io_spa, pio->io_txg, bp, data, size,
|
||||
done, private, type, priority, flags, vd, offset, &pio->io_bookmark,
|
||||
ZIO_STAGE_VDEV_IO_START >> 1, pipeline);
|
||||
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
|
||||
|
||||
zio->io_physdone = pio->io_physdone;
|
||||
if (vd->vdev_ops->vdev_op_leaf && zio->io_logical != NULL)
|
||||
@ -1166,6 +1217,65 @@ zio_read_bp_init(zio_t *zio)
|
||||
|
||||
static int
|
||||
zio_write_bp_init(zio_t *zio)
|
||||
{
|
||||
if (!IO_IS_ALLOCATING(zio))
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
|
||||
|
||||
if (zio->io_bp_override) {
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
zio_prop_t *zp = &zio->io_prop;
|
||||
|
||||
ASSERT(bp->blk_birth != zio->io_txg);
|
||||
ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
|
||||
|
||||
*bp = *zio->io_bp_override;
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
/*
|
||||
* If we've been overridden and nopwrite is set then
|
||||
* set the flag accordingly to indicate that a nopwrite
|
||||
* has already occurred.
|
||||
*/
|
||||
if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
|
||||
ASSERT(!zp->zp_dedup);
|
||||
ASSERT3U(BP_GET_CHECKSUM(bp), ==, zp->zp_checksum);
|
||||
zio->io_flags |= ZIO_FLAG_NOPWRITE;
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
ASSERT(!zp->zp_nopwrite);
|
||||
|
||||
if (BP_IS_HOLE(bp) || !zp->zp_dedup)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
|
||||
|
||||
if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
|
||||
BP_SET_DEDUP(bp, 1);
|
||||
zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* We were unable to handle this as an override bp, treat
|
||||
* it as a regular write I/O.
|
||||
*/
|
||||
zio->io_bp_override = NULL;
|
||||
*bp = zio->io_bp_orig;
|
||||
zio->io_pipeline = zio->io_orig_pipeline;
|
||||
}
|
||||
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
static int
|
||||
zio_write_compress(zio_t *zio)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
zio_prop_t *zp = &zio->io_prop;
|
||||
@ -1197,44 +1307,7 @@ zio_write_bp_init(zio_t *zio)
|
||||
}
|
||||
|
||||
ASSERT(zio->io_child_type != ZIO_CHILD_DDT);
|
||||
|
||||
if (zio->io_bp_override) {
|
||||
ASSERT(bp->blk_birth != zio->io_txg);
|
||||
ASSERT(BP_GET_DEDUP(zio->io_bp_override) == 0);
|
||||
|
||||
*bp = *zio->io_bp_override;
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
if (BP_IS_EMBEDDED(bp))
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
/*
|
||||
* If we've been overridden and nopwrite is set then
|
||||
* set the flag accordingly to indicate that a nopwrite
|
||||
* has already occurred.
|
||||
*/
|
||||
if (!BP_IS_HOLE(bp) && zp->zp_nopwrite) {
|
||||
ASSERT(!zp->zp_dedup);
|
||||
zio->io_flags |= ZIO_FLAG_NOPWRITE;
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
ASSERT(!zp->zp_nopwrite);
|
||||
|
||||
if (BP_IS_HOLE(bp) || !zp->zp_dedup)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
ASSERT((zio_checksum_table[zp->zp_checksum].ci_flags &
|
||||
ZCHECKSUM_FLAG_DEDUP) || zp->zp_dedup_verify);
|
||||
|
||||
if (BP_GET_CHECKSUM(bp) == zp->zp_checksum) {
|
||||
BP_SET_DEDUP(bp, 1);
|
||||
zio->io_pipeline |= ZIO_STAGE_DDT_WRITE;
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
zio->io_bp_override = NULL;
|
||||
BP_ZERO(bp);
|
||||
}
|
||||
ASSERT(zio->io_bp_override == NULL);
|
||||
|
||||
if (!BP_IS_HOLE(bp) && bp->blk_birth == zio->io_txg) {
|
||||
/*
|
||||
@ -1303,6 +1376,14 @@ zio_write_bp_init(zio_t *zio)
|
||||
psize, lsize, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We were unable to handle this as an override bp, treat
|
||||
* it as a regular write I/O.
|
||||
*/
|
||||
zio->io_bp_override = NULL;
|
||||
*bp = zio->io_bp_orig;
|
||||
zio->io_pipeline = zio->io_orig_pipeline;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1355,7 +1436,6 @@ zio_write_bp_init(zio_t *zio)
|
||||
zio->io_pipeline |= ZIO_STAGE_NOP_WRITE;
|
||||
}
|
||||
}
|
||||
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
@ -1532,6 +1612,8 @@ zio_execute(zio_t *zio)
|
||||
{
|
||||
zio->io_executor = curthread;
|
||||
|
||||
ASSERT3U(zio->io_queued_timestamp, >, 0);
|
||||
|
||||
while (zio->io_stage < ZIO_STAGE_DONE) {
|
||||
enum zio_stage pipeline = zio->io_pipeline;
|
||||
enum zio_stage stage = zio->io_stage;
|
||||
@ -1565,6 +1647,7 @@ zio_execute(zio_t *zio)
|
||||
}
|
||||
|
||||
zio->io_stage = stage;
|
||||
zio->io_pipeline_trace |= zio->io_stage;
|
||||
rv = zio_pipeline[highbit64(stage) - 1](zio);
|
||||
|
||||
if (rv == ZIO_PIPELINE_STOP)
|
||||
@ -1588,6 +1671,8 @@ zio_wait(zio_t *zio)
|
||||
ASSERT(zio->io_executor == NULL);
|
||||
|
||||
zio->io_waiter = curthread;
|
||||
ASSERT0(zio->io_queued_timestamp);
|
||||
zio->io_queued_timestamp = gethrtime();
|
||||
|
||||
zio_execute(zio);
|
||||
|
||||
@ -1619,6 +1704,8 @@ zio_nowait(zio_t *zio)
|
||||
zio_add_child(spa->spa_async_zio_root[CPU_SEQID], zio);
|
||||
}
|
||||
|
||||
ASSERT0(zio->io_queued_timestamp);
|
||||
zio->io_queued_timestamp = gethrtime();
|
||||
zio_execute(zio);
|
||||
}
|
||||
|
||||
@ -1643,6 +1730,7 @@ zio_reexecute(zio_t *pio)
|
||||
pio->io_pipeline = pio->io_orig_pipeline;
|
||||
pio->io_reexecute = 0;
|
||||
pio->io_flags |= ZIO_FLAG_REEXECUTED;
|
||||
pio->io_pipeline_trace = 0;
|
||||
pio->io_error = 0;
|
||||
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
|
||||
pio->io_state[w] = 0;
|
||||
@ -1659,8 +1747,9 @@ zio_reexecute(zio_t *pio)
|
||||
* the remainder of pio's io_child_list, from 'cio_next' onward,
|
||||
* cannot be affected by any side effects of reexecuting 'cio'.
|
||||
*/
|
||||
for (cio = zio_walk_children(pio); cio != NULL; cio = cio_next) {
|
||||
cio_next = zio_walk_children(pio);
|
||||
zio_link_t *zl = NULL;
|
||||
for (cio = zio_walk_children(pio, &zl); cio != NULL; cio = cio_next) {
|
||||
cio_next = zio_walk_children(pio, &zl);
|
||||
mutex_enter(&pio->io_lock);
|
||||
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
|
||||
pio->io_children[cio->io_child_type][w]++;
|
||||
@ -1673,8 +1762,10 @@ zio_reexecute(zio_t *pio)
|
||||
* We don't reexecute "The Godfather" I/O here as it's the
|
||||
* responsibility of the caller to wait on him.
|
||||
*/
|
||||
if (!(pio->io_flags & ZIO_FLAG_GODFATHER))
|
||||
if (!(pio->io_flags & ZIO_FLAG_GODFATHER)) {
|
||||
pio->io_queued_timestamp = gethrtime();
|
||||
zio_execute(pio);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -2068,6 +2159,7 @@ static int
|
||||
zio_write_gang_block(zio_t *pio)
|
||||
{
|
||||
spa_t *spa = pio->io_spa;
|
||||
metaslab_class_t *mc = spa_normal_class(spa);
|
||||
blkptr_t *bp = pio->io_bp;
|
||||
zio_t *gio = pio->io_gang_leader;
|
||||
zio_t *zio;
|
||||
@ -2081,10 +2173,43 @@ zio_write_gang_block(zio_t *pio)
|
||||
zio_prop_t zp;
|
||||
int error;
|
||||
|
||||
error = metaslab_alloc(spa, spa_normal_class(spa), SPA_GANGBLOCKSIZE,
|
||||
bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp,
|
||||
METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER);
|
||||
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
|
||||
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
|
||||
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
|
||||
ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
|
||||
|
||||
flags |= METASLAB_ASYNC_ALLOC;
|
||||
VERIFY(refcount_held(&mc->mc_alloc_slots, pio));
|
||||
|
||||
/*
|
||||
* The logical zio has already placed a reservation for
|
||||
* 'copies' allocation slots but gang blocks may require
|
||||
* additional copies. These additional copies
|
||||
* (i.e. gbh_copies - copies) are guaranteed to succeed
|
||||
* since metaslab_class_throttle_reserve() always allows
|
||||
* additional reservations for gang blocks.
|
||||
*/
|
||||
VERIFY(metaslab_class_throttle_reserve(mc, gbh_copies - copies,
|
||||
pio, flags));
|
||||
}
|
||||
|
||||
error = metaslab_alloc(spa, mc, SPA_GANGBLOCKSIZE,
|
||||
bp, gbh_copies, txg, pio == gio ? NULL : gio->io_bp, flags, pio);
|
||||
if (error) {
|
||||
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
|
||||
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
|
||||
ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
|
||||
|
||||
/*
|
||||
* If we failed to allocate the gang block header then
|
||||
* we remove any additional allocation reservations that
|
||||
* we placed here. The original reservation will
|
||||
* be removed when the logical I/O goes to the ready
|
||||
* stage.
|
||||
*/
|
||||
metaslab_class_throttle_unreserve(mc,
|
||||
gbh_copies - copies, pio);
|
||||
}
|
||||
pio->io_error = error;
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
@ -2123,11 +2248,25 @@ zio_write_gang_block(zio_t *pio)
|
||||
zp.zp_dedup_verify = B_FALSE;
|
||||
zp.zp_nopwrite = B_FALSE;
|
||||
|
||||
zio_nowait(zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
|
||||
zio_t *cio = zio_write(zio, spa, txg, &gbh->zg_blkptr[g],
|
||||
(char *)pio->io_data + (pio->io_size - resid), lsize, &zp,
|
||||
zio_write_gang_member_ready, NULL, NULL, NULL,
|
||||
&gn->gn_child[g], pio->io_priority,
|
||||
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark));
|
||||
ZIO_GANG_CHILD_FLAGS(pio), &pio->io_bookmark);
|
||||
|
||||
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
|
||||
ASSERT(pio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
|
||||
ASSERT(!(pio->io_flags & ZIO_FLAG_NODATA));
|
||||
|
||||
/*
|
||||
* Gang children won't throttle but we should
|
||||
* account for their work, so reserve an allocation
|
||||
* slot for them here.
|
||||
*/
|
||||
VERIFY(metaslab_class_throttle_reserve(mc,
|
||||
zp.zp_copies, cio, flags));
|
||||
}
|
||||
zio_nowait(cio);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2356,7 +2495,7 @@ zio_ddt_collision(zio_t *zio, ddt_t *ddt, ddt_entry_t *dde)
|
||||
bcmp(abuf->b_data, zio->io_orig_data,
|
||||
zio->io_orig_size) != 0)
|
||||
error = SET_ERROR(EEXIST);
|
||||
VERIFY(arc_buf_remove_ref(abuf, &abuf));
|
||||
arc_buf_destroy(abuf, &abuf);
|
||||
}
|
||||
|
||||
ddt_enter(ddt);
|
||||
@ -2385,7 +2524,8 @@ zio_ddt_child_write_ready(zio_t *zio)
|
||||
|
||||
ddt_phys_fill(ddp, zio->io_bp);
|
||||
|
||||
while ((pio = zio_walk_parents(zio)) != NULL)
|
||||
zio_link_t *zl = NULL;
|
||||
while ((pio = zio_walk_parents(zio, &zl)) != NULL)
|
||||
ddt_bp_fill(ddp, pio->io_bp, zio->io_txg);
|
||||
|
||||
ddt_exit(ddt);
|
||||
@ -2406,7 +2546,8 @@ zio_ddt_child_write_done(zio_t *zio)
|
||||
dde->dde_lead_zio[p] = NULL;
|
||||
|
||||
if (zio->io_error == 0) {
|
||||
while (zio_walk_parents(zio) != NULL)
|
||||
zio_link_t *zl = NULL;
|
||||
while (zio_walk_parents(zio, &zl) != NULL)
|
||||
ddt_phys_addref(ddp);
|
||||
} else {
|
||||
ddt_phys_clear(ddp);
|
||||
@ -2584,6 +2725,97 @@ zio_ddt_free(zio_t *zio)
|
||||
* Allocate and free blocks
|
||||
* ==========================================================================
|
||||
*/
|
||||
|
||||
static zio_t *
|
||||
zio_io_to_allocate(spa_t *spa)
|
||||
{
|
||||
zio_t *zio;
|
||||
|
||||
ASSERT(MUTEX_HELD(&spa->spa_alloc_lock));
|
||||
|
||||
zio = avl_first(&spa->spa_alloc_tree);
|
||||
if (zio == NULL)
|
||||
return (NULL);
|
||||
|
||||
ASSERT(IO_IS_ALLOCATING(zio));
|
||||
|
||||
/*
|
||||
* Try to place a reservation for this zio. If we're unable to
|
||||
* reserve then we throttle.
|
||||
*/
|
||||
if (!metaslab_class_throttle_reserve(spa_normal_class(spa),
|
||||
zio->io_prop.zp_copies, zio, 0)) {
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
avl_remove(&spa->spa_alloc_tree, zio);
|
||||
ASSERT3U(zio->io_stage, <, ZIO_STAGE_DVA_ALLOCATE);
|
||||
|
||||
return (zio);
|
||||
}
|
||||
|
||||
static int
|
||||
zio_dva_throttle(zio_t *zio)
|
||||
{
|
||||
spa_t *spa = zio->io_spa;
|
||||
zio_t *nio;
|
||||
|
||||
if (zio->io_priority == ZIO_PRIORITY_SYNC_WRITE ||
|
||||
!spa_normal_class(zio->io_spa)->mc_alloc_throttle_enabled ||
|
||||
zio->io_child_type == ZIO_CHILD_GANG ||
|
||||
zio->io_flags & ZIO_FLAG_NODATA) {
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
ASSERT(zio->io_child_type > ZIO_CHILD_GANG);
|
||||
|
||||
ASSERT3U(zio->io_queued_timestamp, >, 0);
|
||||
ASSERT(zio->io_stage == ZIO_STAGE_DVA_THROTTLE);
|
||||
|
||||
mutex_enter(&spa->spa_alloc_lock);
|
||||
|
||||
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
|
||||
avl_add(&spa->spa_alloc_tree, zio);
|
||||
|
||||
nio = zio_io_to_allocate(zio->io_spa);
|
||||
mutex_exit(&spa->spa_alloc_lock);
|
||||
|
||||
if (nio == zio)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
|
||||
if (nio != NULL) {
|
||||
ASSERT3U(nio->io_queued_timestamp, <=,
|
||||
zio->io_queued_timestamp);
|
||||
ASSERT(nio->io_stage == ZIO_STAGE_DVA_THROTTLE);
|
||||
/*
|
||||
* We are passing control to a new zio so make sure that
|
||||
* it is processed by a different thread. We do this to
|
||||
* avoid stack overflows that can occur when parents are
|
||||
* throttled and children are making progress. We allow
|
||||
* it to go to the head of the taskq since it's already
|
||||
* been waiting.
|
||||
*/
|
||||
zio_taskq_dispatch(nio, ZIO_TASKQ_ISSUE, B_TRUE);
|
||||
}
|
||||
return (ZIO_PIPELINE_STOP);
|
||||
}
|
||||
|
||||
void
|
||||
zio_allocate_dispatch(spa_t *spa)
|
||||
{
|
||||
zio_t *zio;
|
||||
|
||||
mutex_enter(&spa->spa_alloc_lock);
|
||||
zio = zio_io_to_allocate(spa);
|
||||
mutex_exit(&spa->spa_alloc_lock);
|
||||
if (zio == NULL)
|
||||
return;
|
||||
|
||||
ASSERT3U(zio->io_stage, ==, ZIO_STAGE_DVA_THROTTLE);
|
||||
ASSERT0(zio->io_error);
|
||||
zio_taskq_dispatch(zio, ZIO_TASKQ_ISSUE, B_TRUE);
|
||||
}
|
||||
|
||||
static int
|
||||
zio_dva_allocate(zio_t *zio)
|
||||
{
|
||||
@ -2604,18 +2836,20 @@ zio_dva_allocate(zio_t *zio)
|
||||
ASSERT3U(zio->io_prop.zp_copies, <=, spa_max_replication(spa));
|
||||
ASSERT3U(zio->io_size, ==, BP_GET_PSIZE(bp));
|
||||
|
||||
/*
|
||||
* The dump device does not support gang blocks so allocation on
|
||||
* behalf of the dump device (i.e. ZIO_FLAG_NODATA) must avoid
|
||||
* the "fast" gang feature.
|
||||
*/
|
||||
flags |= (zio->io_flags & ZIO_FLAG_NODATA) ? METASLAB_GANG_AVOID : 0;
|
||||
flags |= (zio->io_flags & ZIO_FLAG_GANG_CHILD) ?
|
||||
METASLAB_GANG_CHILD : 0;
|
||||
error = metaslab_alloc(spa, mc, zio->io_size, bp,
|
||||
zio->io_prop.zp_copies, zio->io_txg, NULL, flags);
|
||||
if (zio->io_flags & ZIO_FLAG_NODATA) {
|
||||
flags |= METASLAB_DONT_THROTTLE;
|
||||
}
|
||||
if (zio->io_flags & ZIO_FLAG_GANG_CHILD) {
|
||||
flags |= METASLAB_GANG_CHILD;
|
||||
}
|
||||
if (zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE) {
|
||||
flags |= METASLAB_ASYNC_ALLOC;
|
||||
}
|
||||
|
||||
if (error) {
|
||||
error = metaslab_alloc(spa, mc, zio->io_size, bp,
|
||||
zio->io_prop.zp_copies, zio->io_txg, NULL, flags, zio);
|
||||
|
||||
if (error != 0) {
|
||||
spa_dbgmsg(spa, "%s: metaslab allocation failure: zio %p, "
|
||||
"size %llu, error %d", spa_name(spa), zio, zio->io_size,
|
||||
error);
|
||||
@ -2680,21 +2914,14 @@ zio_alloc_zil(spa_t *spa, uint64_t txg, blkptr_t *new_bp, blkptr_t *old_bp,
|
||||
|
||||
ASSERT(txg > spa_syncing_txg(spa));
|
||||
|
||||
/*
|
||||
* ZIL blocks are always contiguous (i.e. not gang blocks) so we
|
||||
* set the METASLAB_GANG_AVOID flag so that they don't "fast gang"
|
||||
* when allocating them.
|
||||
*/
|
||||
if (use_slog) {
|
||||
error = metaslab_alloc(spa, spa_log_class(spa), size,
|
||||
new_bp, 1, txg, old_bp,
|
||||
METASLAB_HINTBP_AVOID | METASLAB_GANG_AVOID);
|
||||
new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, NULL);
|
||||
}
|
||||
|
||||
if (error) {
|
||||
error = metaslab_alloc(spa, spa_normal_class(spa), size,
|
||||
new_bp, 1, txg, old_bp,
|
||||
METASLAB_HINTBP_AVOID);
|
||||
new_bp, 1, txg, old_bp, METASLAB_HINTBP_AVOID, NULL);
|
||||
}
|
||||
|
||||
if (error == 0) {
|
||||
@ -2770,6 +2997,8 @@ zio_vdev_io_start(zio_t *zio)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
ASSERT3P(zio->io_logical, !=, zio);
|
||||
|
||||
/*
|
||||
* We keep track of time-sensitive I/Os so that the scan thread
|
||||
* can quickly react to certain workloads. In particular, we care
|
||||
@ -3188,6 +3417,7 @@ zio_ready(zio_t *zio)
|
||||
{
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
zio_t *pio, *pio_next;
|
||||
zio_link_t *zl = NULL;
|
||||
|
||||
if (zio_wait_for_children(zio, ZIO_CHILD_GANG, ZIO_WAIT_READY) ||
|
||||
zio_wait_for_children(zio, ZIO_CHILD_DDT, ZIO_WAIT_READY))
|
||||
@ -3205,12 +3435,26 @@ zio_ready(zio_t *zio)
|
||||
if (bp != NULL && bp != &zio->io_bp_copy)
|
||||
zio->io_bp_copy = *bp;
|
||||
|
||||
if (zio->io_error)
|
||||
if (zio->io_error != 0) {
|
||||
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;
|
||||
|
||||
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
|
||||
ASSERT(IO_IS_ALLOCATING(zio));
|
||||
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
|
||||
/*
|
||||
* We were unable to allocate anything, unreserve and
|
||||
* issue the next I/O to allocate.
|
||||
*/
|
||||
metaslab_class_throttle_unreserve(
|
||||
spa_normal_class(zio->io_spa),
|
||||
zio->io_prop.zp_copies, zio);
|
||||
zio_allocate_dispatch(zio->io_spa);
|
||||
}
|
||||
}
|
||||
|
||||
mutex_enter(&zio->io_lock);
|
||||
zio->io_state[ZIO_WAIT_READY] = 1;
|
||||
pio = zio_walk_parents(zio);
|
||||
pio = zio_walk_parents(zio, &zl);
|
||||
mutex_exit(&zio->io_lock);
|
||||
|
||||
/*
|
||||
@ -3221,7 +3465,7 @@ zio_ready(zio_t *zio)
|
||||
* all parents must wait for us to be done before they can be done.
|
||||
*/
|
||||
for (; pio != NULL; pio = pio_next) {
|
||||
pio_next = zio_walk_parents(zio);
|
||||
pio_next = zio_walk_parents(zio, &zl);
|
||||
zio_notify_parent(pio, zio, ZIO_WAIT_READY);
|
||||
}
|
||||
|
||||
@ -3241,6 +3485,66 @@ zio_ready(zio_t *zio)
|
||||
return (ZIO_PIPELINE_CONTINUE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the allocation throttle accounting.
|
||||
*/
|
||||
static void
|
||||
zio_dva_throttle_done(zio_t *zio)
|
||||
{
|
||||
zio_t *lio = zio->io_logical;
|
||||
zio_t *pio = zio_unique_parent(zio);
|
||||
vdev_t *vd = zio->io_vd;
|
||||
int flags = METASLAB_ASYNC_ALLOC;
|
||||
|
||||
ASSERT3P(zio->io_bp, !=, NULL);
|
||||
ASSERT3U(zio->io_type, ==, ZIO_TYPE_WRITE);
|
||||
ASSERT3U(zio->io_priority, ==, ZIO_PRIORITY_ASYNC_WRITE);
|
||||
ASSERT3U(zio->io_child_type, ==, ZIO_CHILD_VDEV);
|
||||
ASSERT(vd != NULL);
|
||||
ASSERT3P(vd, ==, vd->vdev_top);
|
||||
ASSERT(!(zio->io_flags & (ZIO_FLAG_IO_REPAIR | ZIO_FLAG_IO_RETRY)));
|
||||
ASSERT(zio->io_flags & ZIO_FLAG_IO_ALLOCATING);
|
||||
ASSERT(!(lio->io_flags & ZIO_FLAG_IO_REWRITE));
|
||||
ASSERT(!(lio->io_orig_flags & ZIO_FLAG_NODATA));
|
||||
|
||||
/*
|
||||
* Parents of gang children can have two flavors -- ones that
|
||||
* allocated the gang header (will have ZIO_FLAG_IO_REWRITE set)
|
||||
* and ones that allocated the constituent blocks. The allocation
|
||||
* throttle needs to know the allocating parent zio so we must find
|
||||
* it here.
|
||||
*/
|
||||
if (pio->io_child_type == ZIO_CHILD_GANG) {
|
||||
/*
|
||||
* If our parent is a rewrite gang child then our grandparent
|
||||
* would have been the one that performed the allocation.
|
||||
*/
|
||||
if (pio->io_flags & ZIO_FLAG_IO_REWRITE)
|
||||
pio = zio_unique_parent(pio);
|
||||
flags |= METASLAB_GANG_CHILD;
|
||||
}
|
||||
|
||||
ASSERT(IO_IS_ALLOCATING(pio));
|
||||
ASSERT3P(zio, !=, zio->io_logical);
|
||||
ASSERT(zio->io_logical != NULL);
|
||||
ASSERT(!(zio->io_flags & ZIO_FLAG_IO_REPAIR));
|
||||
ASSERT0(zio->io_flags & ZIO_FLAG_NOPWRITE);
|
||||
|
||||
mutex_enter(&pio->io_lock);
|
||||
metaslab_group_alloc_decrement(zio->io_spa, vd->vdev_id, pio, flags);
|
||||
mutex_exit(&pio->io_lock);
|
||||
|
||||
metaslab_class_throttle_unreserve(spa_normal_class(zio->io_spa),
|
||||
1, pio);
|
||||
|
||||
/*
|
||||
* Call into the pipeline to see if there is more work that
|
||||
* needs to be done. If there is work to be done it will be
|
||||
* dispatched to another taskq thread.
|
||||
*/
|
||||
zio_allocate_dispatch(zio->io_spa);
|
||||
}
|
||||
|
||||
static int
|
||||
zio_done(zio_t *zio)
|
||||
{
|
||||
@ -3250,6 +3554,8 @@ zio_done(zio_t *zio)
|
||||
vdev_t *vd = zio->io_vd;
|
||||
uint64_t psize = zio->io_size;
|
||||
zio_t *pio, *pio_next;
|
||||
metaslab_class_t *mc = spa_normal_class(spa);
|
||||
zio_link_t *zl = NULL;
|
||||
|
||||
/*
|
||||
* If our children haven't all completed,
|
||||
@ -3261,6 +3567,30 @@ zio_done(zio_t *zio)
|
||||
zio_wait_for_children(zio, ZIO_CHILD_LOGICAL, ZIO_WAIT_DONE))
|
||||
return (ZIO_PIPELINE_STOP);
|
||||
|
||||
/*
|
||||
* If the allocation throttle is enabled, then update the accounting.
|
||||
* We only track child I/Os that are part of an allocating async
|
||||
* write. We must do this since the allocation is performed
|
||||
* by the logical I/O but the actual write is done by child I/Os.
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING &&
|
||||
zio->io_child_type == ZIO_CHILD_VDEV) {
|
||||
ASSERT(mc->mc_alloc_throttle_enabled);
|
||||
zio_dva_throttle_done(zio);
|
||||
}
|
||||
|
||||
/*
|
||||
* If the allocation throttle is enabled, verify that
|
||||
* we have decremented the refcounts for every I/O that was throttled.
|
||||
*/
|
||||
if (zio->io_flags & ZIO_FLAG_IO_ALLOCATING) {
|
||||
ASSERT(zio->io_type == ZIO_TYPE_WRITE);
|
||||
ASSERT(zio->io_priority == ZIO_PRIORITY_ASYNC_WRITE);
|
||||
ASSERT(bp != NULL);
|
||||
metaslab_group_alloc_verify(spa, zio->io_bp, zio);
|
||||
VERIFY(refcount_not_held(&mc->mc_alloc_slots, zio));
|
||||
}
|
||||
|
||||
for (int c = 0; c < ZIO_CHILD_TYPES; c++)
|
||||
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
|
||||
ASSERT(zio->io_children[c][w] == 0);
|
||||
@ -3430,13 +3760,15 @@ zio_done(zio_t *zio)
|
||||
* trouble (e.g. suspended). This allows "The Godfather"
|
||||
* I/O to return status without blocking.
|
||||
*/
|
||||
for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
|
||||
zio_link_t *zl = zio->io_walk_link;
|
||||
pio_next = zio_walk_parents(zio);
|
||||
zl = NULL;
|
||||
for (pio = zio_walk_parents(zio, &zl); pio != NULL;
|
||||
pio = pio_next) {
|
||||
zio_link_t *remove_zl = zl;
|
||||
pio_next = zio_walk_parents(zio, &zl);
|
||||
|
||||
if ((pio->io_flags & ZIO_FLAG_GODFATHER) &&
|
||||
(zio->io_reexecute & ZIO_REEXECUTE_SUSPEND)) {
|
||||
zio_remove_child(pio, zio, zl);
|
||||
zio_remove_child(pio, zio, remove_zl);
|
||||
zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
|
||||
}
|
||||
}
|
||||
@ -3500,10 +3832,11 @@ zio_done(zio_t *zio)
|
||||
zio->io_state[ZIO_WAIT_DONE] = 1;
|
||||
mutex_exit(&zio->io_lock);
|
||||
|
||||
for (pio = zio_walk_parents(zio); pio != NULL; pio = pio_next) {
|
||||
zio_link_t *zl = zio->io_walk_link;
|
||||
pio_next = zio_walk_parents(zio);
|
||||
zio_remove_child(pio, zio, zl);
|
||||
zl = NULL;
|
||||
for (pio = zio_walk_parents(zio, &zl); pio != NULL; pio = pio_next) {
|
||||
zio_link_t *remove_zl = zl;
|
||||
pio_next = zio_walk_parents(zio, &zl);
|
||||
zio_remove_child(pio, zio, remove_zl);
|
||||
zio_notify_parent(pio, zio, ZIO_WAIT_DONE);
|
||||
}
|
||||
|
||||
@ -3527,9 +3860,10 @@ zio_done(zio_t *zio)
|
||||
static zio_pipe_stage_t *zio_pipeline[] = {
|
||||
NULL,
|
||||
zio_read_bp_init,
|
||||
zio_write_bp_init,
|
||||
zio_free_bp_init,
|
||||
zio_issue_async,
|
||||
zio_write_bp_init,
|
||||
zio_write_compress,
|
||||
zio_checksum_generate,
|
||||
zio_nop_write,
|
||||
zio_ddt_read_start,
|
||||
@ -3538,6 +3872,7 @@ static zio_pipe_stage_t *zio_pipeline[] = {
|
||||
zio_ddt_free,
|
||||
zio_gang_assemble,
|
||||
zio_gang_issue,
|
||||
zio_dva_throttle,
|
||||
zio_dva_allocate,
|
||||
zio_dva_free,
|
||||
zio_dva_claim,
|
||||
|
@ -297,20 +297,12 @@ zio_checksum_compute(zio_t *zio, enum zio_checksum checksum,
|
||||
}
|
||||
|
||||
int
|
||||
zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
|
||||
zio_checksum_error_impl(spa_t *spa, blkptr_t *bp, enum zio_checksum checksum,
|
||||
void *data, uint64_t size, uint64_t offset, zio_bad_cksum_t *info)
|
||||
{
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
|
||||
(BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
|
||||
int byteswap;
|
||||
int error;
|
||||
uint64_t size = (bp == NULL ? zio->io_size :
|
||||
(BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
|
||||
uint64_t offset = zio->io_offset;
|
||||
void *data = zio->io_data;
|
||||
zio_checksum_info_t *ci = &zio_checksum_table[checksum];
|
||||
zio_cksum_t actual_cksum, expected_cksum, verifier;
|
||||
spa_t *spa = zio->io_spa;
|
||||
zio_cksum_t actual_cksum, expected_cksum;
|
||||
int byteswap;
|
||||
|
||||
if (checksum >= ZIO_CHECKSUM_FUNCTIONS || ci->ci_func[0] == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
@ -319,6 +311,7 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
|
||||
|
||||
if (ci->ci_flags & ZCHECKSUM_FLAG_EMBEDDED) {
|
||||
zio_eck_t *eck;
|
||||
zio_cksum_t verifier;
|
||||
|
||||
if (checksum == ZIO_CHECKSUM_ZILOG2) {
|
||||
zil_chain_t *zilc = data;
|
||||
@ -358,35 +351,54 @@ zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
|
||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||
eck->zec_cksum = expected_cksum;
|
||||
|
||||
if (byteswap)
|
||||
if (byteswap) {
|
||||
byteswap_uint64_array(&expected_cksum,
|
||||
sizeof (zio_cksum_t));
|
||||
}
|
||||
} else {
|
||||
ASSERT(!BP_IS_GANG(bp));
|
||||
byteswap = BP_SHOULD_BYTESWAP(bp);
|
||||
expected_cksum = bp->blk_cksum;
|
||||
ci->ci_func[byteswap](data, size,
|
||||
spa->spa_cksum_tmpls[checksum], &actual_cksum);
|
||||
}
|
||||
|
||||
info->zbc_expected = expected_cksum;
|
||||
info->zbc_actual = actual_cksum;
|
||||
info->zbc_checksum_name = ci->ci_name;
|
||||
info->zbc_byteswapped = byteswap;
|
||||
info->zbc_injected = 0;
|
||||
info->zbc_has_cksum = 1;
|
||||
if (info != NULL) {
|
||||
info->zbc_expected = expected_cksum;
|
||||
info->zbc_actual = actual_cksum;
|
||||
info->zbc_checksum_name = ci->ci_name;
|
||||
info->zbc_byteswapped = byteswap;
|
||||
info->zbc_injected = 0;
|
||||
info->zbc_has_cksum = 1;
|
||||
}
|
||||
|
||||
if (!ZIO_CHECKSUM_EQUAL(actual_cksum, expected_cksum))
|
||||
return (SET_ERROR(ECKSUM));
|
||||
|
||||
if (zio_injection_enabled && !zio->io_error &&
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zio_checksum_error(zio_t *zio, zio_bad_cksum_t *info)
|
||||
{
|
||||
blkptr_t *bp = zio->io_bp;
|
||||
uint_t checksum = (bp == NULL ? zio->io_prop.zp_checksum :
|
||||
(BP_IS_GANG(bp) ? ZIO_CHECKSUM_GANG_HEADER : BP_GET_CHECKSUM(bp)));
|
||||
int error;
|
||||
uint64_t size = (bp == NULL ? zio->io_size :
|
||||
(BP_IS_GANG(bp) ? SPA_GANGBLOCKSIZE : BP_GET_PSIZE(bp)));
|
||||
uint64_t offset = zio->io_offset;
|
||||
void *data = zio->io_data;
|
||||
spa_t *spa = zio->io_spa;
|
||||
|
||||
error = zio_checksum_error_impl(spa, bp, checksum, data, size,
|
||||
offset, info);
|
||||
if (error != 0 && zio_injection_enabled && !zio->io_error &&
|
||||
(error = zio_handle_fault_injection(zio, ECKSUM)) != 0) {
|
||||
|
||||
info->zbc_injected = 1;
|
||||
return (error);
|
||||
}
|
||||
|
||||
return (0);
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -903,7 +903,8 @@ typedef enum {
|
||||
SPA_LOAD_IMPORT, /* import in progress */
|
||||
SPA_LOAD_TRYIMPORT, /* tryimport in progress */
|
||||
SPA_LOAD_RECOVER, /* recovery requested */
|
||||
SPA_LOAD_ERROR /* load failed */
|
||||
SPA_LOAD_ERROR, /* load failed */
|
||||
SPA_LOAD_CREATE /* creation in progress */
|
||||
} spa_load_state_t;
|
||||
|
||||
/*
|
||||
|
@ -103,8 +103,16 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <dev/drm2/drm_os_freebsd.h>
|
||||
|
||||
#define __OS_HAS_AGP (defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE)))
|
||||
#define __OS_HAS_MTRR (defined(CONFIG_MTRR))
|
||||
#if defined(CONFIG_AGP) || (defined(CONFIG_AGP_MODULE) && defined(MODULE))
|
||||
#define __OS_HAS_AGP 1
|
||||
#else
|
||||
#define __OS_HAS_AGP 0
|
||||
#endif
|
||||
#if defined(CONFIG_MTRR)
|
||||
#define __OS_HAS_MTRR 1
|
||||
#else
|
||||
#define __OS_HAS_MTRR 0
|
||||
#endif
|
||||
|
||||
struct drm_file;
|
||||
struct drm_device;
|
||||
|
@ -57,13 +57,13 @@ __FBSDID("$FreeBSD$");
|
||||
* Wait for the peripherial up to 40ms
|
||||
*/
|
||||
static int
|
||||
do_1284_wait(device_t bus, char mask, char status)
|
||||
do_1284_wait(device_t bus, uint8_t mask, uint8_t status)
|
||||
{
|
||||
return (ppb_poll_bus(bus, 4, mask, status, PPB_NOINTR | PPB_POLL));
|
||||
}
|
||||
|
||||
static int
|
||||
do_peripheral_wait(device_t bus, char mask, char status)
|
||||
do_peripheral_wait(device_t bus, uint8_t mask, uint8_t status)
|
||||
{
|
||||
return (ppb_poll_bus(bus, 100, mask, status, PPB_NOINTR | PPB_POLL));
|
||||
}
|
||||
|
@ -54,11 +54,11 @@ MODULE_VERSION(ppbus, 1);
|
||||
*/
|
||||
int
|
||||
ppb_poll_bus(device_t bus, int max,
|
||||
char mask, char status, int how)
|
||||
uint8_t mask, uint8_t status, int how)
|
||||
{
|
||||
struct ppb_data *ppb = DEVTOSOFTC(bus);
|
||||
int i, j, error;
|
||||
char r;
|
||||
uint8_t r;
|
||||
|
||||
ppb_assert_locked(bus);
|
||||
|
||||
@ -186,7 +186,7 @@ ppb_ecp_sync(device_t bus)
|
||||
int
|
||||
ppb_get_status(device_t bus, struct ppb_status *status)
|
||||
{
|
||||
register char r;
|
||||
uint8_t r;
|
||||
|
||||
ppb_assert_locked(bus);
|
||||
|
||||
|
@ -263,7 +263,7 @@ extern void _ppb_assert_locked(device_t, const char *, int);
|
||||
extern void ppb_init_callout(device_t, struct callout *, int);
|
||||
extern int ppb_sleep(device_t, void *, int, const char *, int);
|
||||
extern int ppb_get_status(device_t, struct ppb_status *);
|
||||
extern int ppb_poll_bus(device_t, int, char, char, int);
|
||||
extern int ppb_poll_bus(device_t, int, uint8_t, uint8_t, int);
|
||||
extern int ppb_reset_epp_timeout(device_t);
|
||||
extern int ppb_ecp_sync(device_t);
|
||||
extern int ppb_get_epp_protocol(device_t);
|
||||
|
@ -367,7 +367,7 @@ mpc85xx_map_dcsr(void)
|
||||
err = fdt_get_range(node, 0, &b, &s);
|
||||
|
||||
if (err != 0)
|
||||
return (err);
|
||||
return (0);
|
||||
|
||||
law_enable(OCP85XX_TGTIF_DCSR, b, 0x400000);
|
||||
return pmap_early_io_map(b, 0x400000);
|
||||
|
@ -663,7 +663,7 @@ static void
|
||||
cpu_idle_booke(sbintime_t sbt)
|
||||
{
|
||||
|
||||
#ifdef E500
|
||||
#ifdef BOOKE_E500
|
||||
platform_cpu_idle(PCPU_GET(cpuid));
|
||||
#endif
|
||||
}
|
||||
|
32
tools/tools/crypto/cryptorun.sh
Executable file
32
tools/tools/crypto/cryptorun.sh
Executable file
@ -0,0 +1,32 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# A simple test runner for cryptotest
|
||||
#
|
||||
# Althought cryptotest itself has a -z mode to test all algorithms at
|
||||
# a variety of sizes, this script allows us to be more selective.
|
||||
# Threads and buffer sizes move in powers of two from 1, for threads,
|
||||
# and 256 for buffer sizes.
|
||||
#
|
||||
# e.g. cryptorun.sh aes 4 512
|
||||
#
|
||||
# Test aes with 1, 2 and 4 processes, and at sizes of 256 and 512 bytes.
|
||||
#
|
||||
# $FreeBSD$
|
||||
#
|
||||
|
||||
threads=1
|
||||
size=256
|
||||
iterations=1000000
|
||||
crypto="/tank/users/gnn/Repos/svn/FreeBSD.HEAD/tools/tools/crypto/cryptotest"
|
||||
max_threads=$2
|
||||
max_size=$3
|
||||
|
||||
while [ "$threads" -le "$max_threads" ]; do
|
||||
echo "Testing with $threads processes."
|
||||
while [ "$size" -le "$max_size" ]; do
|
||||
$crypto -t $threads -a $1 $iterations $size
|
||||
size=$(($size * 2))
|
||||
done
|
||||
size=256
|
||||
threads=$(($threads * 2))
|
||||
done
|
@ -84,6 +84,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/sysctl.h>
|
||||
@ -96,6 +97,7 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sysexits.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <crypto/cryptodev.h>
|
||||
@ -130,9 +132,6 @@ struct alg {
|
||||
{ "aes", 0, 16, 16, 16, CRYPTO_AES_CBC},
|
||||
{ "aes192", 0, 16, 24, 24, CRYPTO_AES_CBC},
|
||||
{ "aes256", 0, 16, 32, 32, CRYPTO_AES_CBC},
|
||||
#ifdef notdef
|
||||
{ "arc4", 0, 8, 1, 32, CRYPTO_ARC4 },
|
||||
#endif
|
||||
{ "md5", 1, 8, 16, 16, CRYPTO_MD5_HMAC },
|
||||
{ "sha1", 1, 8, 20, 20, CRYPTO_SHA1_HMAC },
|
||||
{ "sha256", 1, 8, 32, 32, CRYPTO_SHA2_256_HMAC },
|
||||
@ -146,8 +145,8 @@ usage(const char* cmd)
|
||||
printf("usage: %s [-czsbv] [-d dev] [-a algorithm] [count] [size ...]\n",
|
||||
cmd);
|
||||
printf("where algorithm is one of:\n");
|
||||
printf(" des 3des (default) blowfish cast skipjack rij\n");
|
||||
printf(" aes aes192 aes256 arc4\n");
|
||||
printf(" null des 3des (default) blowfish cast skipjack rij\n");
|
||||
printf(" aes aes192 aes256 md5 sha1 sha256 sha384 sha512\n");
|
||||
printf("count is the number of encrypt/decrypt ops to do\n");
|
||||
printf("size is the number of bytes of text to encrypt+decrypt\n");
|
||||
printf("\n");
|
||||
@ -158,6 +157,7 @@ usage(const char* cmd)
|
||||
printf("-v be verbose\n");
|
||||
printf("-b mark operations for batching\n");
|
||||
printf("-p profile kernel crypto operation (must be root)\n");
|
||||
printf("-t n for n threads and run tests concurrently\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
@ -469,6 +469,11 @@ runtests(struct alg *alg, int count, int size, u_long cmd, int threads, int prof
|
||||
if (threads > 1) {
|
||||
for (i = 0; i < threads; i++)
|
||||
if (fork() == 0) {
|
||||
cpuset_t mask;
|
||||
CPU_ZERO(&mask);
|
||||
CPU_SET(i, &mask);
|
||||
cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID,
|
||||
-1, sizeof(mask), &mask);
|
||||
runtest(alg, count, size, cmd, &tvp[i]);
|
||||
exit(0);
|
||||
}
|
||||
@ -483,17 +488,10 @@ runtests(struct alg *alg, int count, int size, u_long cmd, int threads, int prof
|
||||
if (t) {
|
||||
int nops = alg->ishash ? count : 2*count;
|
||||
|
||||
#if 0
|
||||
t /= threads;
|
||||
printf("%6.3lf sec, %7d %6s crypts, %7d bytes, %8.0lf byte/sec, %7.1lf Mb/sec\n",
|
||||
t, nops, alg->name, size, (double)nops*size / t,
|
||||
(double)nops*size / t * 8 / 1024 / 1024);
|
||||
#else
|
||||
nops *= threads;
|
||||
printf("%8.3lf sec, %7d %6s crypts, %7d bytes, %8.0lf byte/sec, %7.1lf Mb/sec\n",
|
||||
t, nops, alg->name, size, (double)nops*size / t,
|
||||
(double)nops*size / t * 8 / 1024 / 1024);
|
||||
#endif
|
||||
}
|
||||
#ifdef __FreeBSD__
|
||||
if (profile) {
|
||||
@ -581,6 +579,9 @@ main(int argc, char **argv)
|
||||
}
|
||||
argc--, argv++;
|
||||
}
|
||||
if (maxthreads > CPU_SETSIZE)
|
||||
errx(EX_USAGE, "Too many threads, %d, choose fewer.", maxthreads);
|
||||
|
||||
if (nsizes == 0) {
|
||||
if (alg)
|
||||
sizes[nsizes++] = alg->blocksize;
|
||||
|
@ -16,6 +16,7 @@ DIRDEPS = \
|
||||
lib/libopenbsd \
|
||||
lib/librpcsvc \
|
||||
lib/libutil \
|
||||
usr.bin/yacc.host \
|
||||
|
||||
|
||||
.include <dirdeps.mk>
|
||||
|
Loading…
Reference in New Issue
Block a user