5027 zfs large block support
Reviewed by: Alek Pinchuk <pinchuk.alek@gmail.com> Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com> Reviewed by: Richard Elling <richard.elling@richardelling.com> Reviewed by: Saso Kiselkov <skiselkov.ml@gmail.com> Reviewed by: Brian Behlendorf <behlendorf1@llnl.gov> Approved by: Dan McDonald <danmcd@omniti.com> Author: Matthew Ahrens <matt@mahrens.org> illumos/illumos-gate@b515258426
This commit is contained in:
parent
6c426f3bd4
commit
0945ce02d2
@ -2120,6 +2120,8 @@ dump_label(const char *dev)
|
||||
(void) close(fd);
|
||||
}
|
||||
|
||||
static uint64_t num_large_blocks;
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
dump_one_dir(const char *dsname, void *arg)
|
||||
@ -2132,6 +2134,8 @@ dump_one_dir(const char *dsname, void *arg)
|
||||
(void) printf("Could not open %s, error %d\n", dsname, error);
|
||||
return (0);
|
||||
}
|
||||
if (dmu_objset_ds(os)->ds_large_blocks)
|
||||
num_large_blocks++;
|
||||
dump_dir(os);
|
||||
dmu_objset_disown(os, FTAG);
|
||||
fuid_table_destroy();
|
||||
@ -2142,7 +2146,7 @@ dump_one_dir(const char *dsname, void *arg)
|
||||
/*
|
||||
* Block statistics.
|
||||
*/
|
||||
#define PSIZE_HISTO_SIZE (SPA_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1)
|
||||
#define PSIZE_HISTO_SIZE (SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 2)
|
||||
typedef struct zdb_blkstats {
|
||||
uint64_t zb_asize;
|
||||
uint64_t zb_lsize;
|
||||
@ -2207,7 +2211,15 @@ zdb_count_block(zdb_cb_t *zcb, zilog_t *zilog, const blkptr_t *bp,
|
||||
zb->zb_lsize += BP_GET_LSIZE(bp);
|
||||
zb->zb_psize += BP_GET_PSIZE(bp);
|
||||
zb->zb_count++;
|
||||
zb->zb_psize_histogram[BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT]++;
|
||||
|
||||
/*
|
||||
* The histogram is only big enough to record blocks up to
|
||||
* SPA_OLD_MAXBLOCKSIZE; larger blocks go into the last,
|
||||
* "other", bucket.
|
||||
*/
|
||||
int idx = BP_GET_PSIZE(bp) >> SPA_MINBLOCKSHIFT;
|
||||
idx = MIN(idx, SPA_OLD_MAXBLOCKSIZE / SPA_MINBLOCKSIZE + 1);
|
||||
zb->zb_psize_histogram[idx]++;
|
||||
|
||||
zb->zb_gangs += BP_COUNT_GANG(bp);
|
||||
|
||||
@ -2917,6 +2929,7 @@ dump_zpool(spa_t *spa)
|
||||
dump_metaslab_groups(spa);
|
||||
|
||||
if (dump_opt['d'] || dump_opt['i']) {
|
||||
uint64_t refcount;
|
||||
dump_dir(dp->dp_meta_objset);
|
||||
if (dump_opt['d'] >= 3) {
|
||||
dump_bpobj(&spa->spa_deferred_bpobj,
|
||||
@ -2936,8 +2949,21 @@ dump_zpool(spa_t *spa)
|
||||
}
|
||||
(void) dmu_objset_find(spa_name(spa), dump_one_dir,
|
||||
NULL, DS_FIND_SNAPSHOTS | DS_FIND_CHILDREN);
|
||||
|
||||
(void) feature_get_refcount(spa,
|
||||
&spa_feature_table[SPA_FEATURE_LARGE_BLOCKS], &refcount);
|
||||
if (num_large_blocks != refcount) {
|
||||
(void) printf("large_blocks feature refcount mismatch: "
|
||||
"expected %lld != actual %lld\n",
|
||||
(longlong_t)num_large_blocks,
|
||||
(longlong_t)refcount);
|
||||
rc = 2;
|
||||
} else {
|
||||
(void) printf("Verified large_blocks feature refcount "
|
||||
"is correct (%llu)\n", (longlong_t)refcount);
|
||||
}
|
||||
}
|
||||
if (dump_opt['b'] || dump_opt['c'])
|
||||
if (rc == 0 && (dump_opt['b'] || dump_opt['c']))
|
||||
rc = dump_block_stats(spa);
|
||||
|
||||
if (rc == 0)
|
||||
|
@ -256,9 +256,9 @@ get_usage(zfs_help_t idx)
|
||||
case HELP_ROLLBACK:
|
||||
return (gettext("\trollback [-rRf] <snapshot>\n"));
|
||||
case HELP_SEND:
|
||||
return (gettext("\tsend [-DnPpRve] [-[iI] snapshot] "
|
||||
return (gettext("\tsend [-DnPpRvLe] [-[iI] snapshot] "
|
||||
"<snapshot>\n"
|
||||
"\tsend [-e] [-i snapshot|bookmark] "
|
||||
"\tsend [-Le] [-i snapshot|bookmark] "
|
||||
"<filesystem|volume|snapshot>\n"));
|
||||
case HELP_SET:
|
||||
return (gettext("\tset <property=value> "
|
||||
@ -3640,7 +3640,7 @@ zfs_do_send(int argc, char **argv)
|
||||
boolean_t extraverbose = B_FALSE;
|
||||
|
||||
/* check options */
|
||||
while ((c = getopt(argc, argv, ":i:I:RDpvnPe")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":i:I:RDpvnPLe")) != -1) {
|
||||
switch (c) {
|
||||
case 'i':
|
||||
if (fromname)
|
||||
@ -3675,6 +3675,9 @@ zfs_do_send(int argc, char **argv)
|
||||
case 'n':
|
||||
flags.dryrun = B_TRUE;
|
||||
break;
|
||||
case 'L':
|
||||
flags.largeblock = B_TRUE;
|
||||
break;
|
||||
case 'e':
|
||||
flags.embed_data = B_TRUE;
|
||||
break;
|
||||
@ -3731,6 +3734,8 @@ zfs_do_send(int argc, char **argv)
|
||||
if (zhp == NULL)
|
||||
return (1);
|
||||
|
||||
if (flags.largeblock)
|
||||
lzc_flags |= LZC_SEND_FLAG_LARGE_BLOCK;
|
||||
if (flags.embed_data)
|
||||
lzc_flags |= LZC_SEND_FLAG_EMBED_DATA;
|
||||
|
||||
|
@ -54,7 +54,6 @@ uint64_t total_stream_len = 0;
|
||||
FILE *send_stream = 0;
|
||||
boolean_t do_byteswap = B_FALSE;
|
||||
boolean_t do_cksum = B_TRUE;
|
||||
#define INITIAL_BUFLEN (1<<20)
|
||||
|
||||
static void
|
||||
usage(void)
|
||||
@ -67,6 +66,18 @@ usage(void)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static void *
|
||||
safe_malloc(size_t size)
|
||||
{
|
||||
void *rv = malloc(size);
|
||||
if (rv == NULL) {
|
||||
(void) fprintf(stderr, "ERROR; failed to allocate %zu bytes\n",
|
||||
size);
|
||||
abort();
|
||||
}
|
||||
return (rv);
|
||||
}
|
||||
|
||||
/*
|
||||
* ssread - send stream read.
|
||||
*
|
||||
@ -158,7 +169,7 @@ print_block(char *buf, int length)
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
char *buf = malloc(INITIAL_BUFLEN);
|
||||
char *buf = safe_malloc(SPA_MAXBLOCKSIZE);
|
||||
uint64_t drr_record_count[DRR_NUMTYPES] = { 0 };
|
||||
uint64_t total_records = 0;
|
||||
dmu_replay_record_t thedrr;
|
||||
@ -307,9 +318,9 @@ main(int argc, char *argv[])
|
||||
nvlist_t *nv;
|
||||
int sz = drr->drr_payloadlen;
|
||||
|
||||
if (sz > INITIAL_BUFLEN) {
|
||||
if (sz > SPA_MAXBLOCKSIZE) {
|
||||
free(buf);
|
||||
buf = malloc(sz);
|
||||
buf = safe_malloc(sz);
|
||||
}
|
||||
(void) ssread(buf, sz, &zc);
|
||||
if (ferror(send_stream))
|
||||
|
@ -985,9 +985,15 @@ ztest_spa_get_ashift() {
|
||||
static int
|
||||
ztest_random_blocksize(void)
|
||||
{
|
||||
// Choose a block size >= the ashift.
|
||||
uint64_t block_shift =
|
||||
ztest_random(SPA_MAXBLOCKSHIFT - ztest_spa_get_ashift() + 1);
|
||||
uint64_t block_shift;
|
||||
/*
|
||||
* Choose a block size >= the ashift.
|
||||
* If the SPA supports new MAXBLOCKSIZE, test up to 1MB blocks.
|
||||
*/
|
||||
int maxbs = SPA_OLD_MAXBLOCKSHIFT;
|
||||
if (spa_maxblocksize(ztest_spa) == SPA_MAXBLOCKSIZE)
|
||||
maxbs = 20;
|
||||
block_shift = ztest_random(maxbs - ztest_spa_get_ashift() + 1);
|
||||
return (1 << (SPA_MINBLOCKSHIFT + block_shift));
|
||||
}
|
||||
|
||||
@ -4787,7 +4793,7 @@ ztest_fault_inject(ztest_ds_t *zd, uint64_t id)
|
||||
char path0[MAXPATHLEN];
|
||||
char pathrand[MAXPATHLEN];
|
||||
size_t fsize;
|
||||
int bshift = SPA_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
|
||||
int bshift = SPA_OLD_MAXBLOCKSHIFT + 2; /* don't scrog all labels */
|
||||
int iters = 1000;
|
||||
int maxfaults;
|
||||
int mirror_save;
|
||||
|
@ -57,7 +57,8 @@ valid_char(char c, boolean_t after_colon)
|
||||
{
|
||||
return ((c >= 'a' && c <= 'z') ||
|
||||
(c >= '0' && c <= '9') ||
|
||||
c == (after_colon ? '_' : '.'));
|
||||
(after_colon && c == '_') ||
|
||||
(!after_colon && (c == '.' || c == '-')));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -221,4 +222,13 @@ zpool_feature_init(void)
|
||||
"com.delphix:embedded_data", "embedded_data",
|
||||
"Blocks which compress very well use even less space.",
|
||||
B_FALSE, B_TRUE, B_TRUE, NULL);
|
||||
|
||||
static const spa_feature_t large_blocks_deps[] = {
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
SPA_FEATURE_NONE
|
||||
};
|
||||
zfeature_register(SPA_FEATURE_LARGE_BLOCKS,
|
||||
"org.open-zfs:large_blocks", "large_blocks",
|
||||
"Support for blocks larger than 128KB.", B_FALSE, B_FALSE, B_FALSE,
|
||||
large_blocks_deps);
|
||||
}
|
||||
|
@ -51,6 +51,7 @@ typedef enum spa_feature {
|
||||
SPA_FEATURE_EMBEDDED_DATA,
|
||||
SPA_FEATURE_BOOKMARKS,
|
||||
SPA_FEATURE_FS_SS_LIMIT,
|
||||
SPA_FEATURE_LARGE_BLOCKS,
|
||||
SPA_FEATURES
|
||||
} spa_feature_t;
|
||||
|
||||
|
@ -397,8 +397,8 @@ zfs_prop_init(void)
|
||||
|
||||
/* inherit number properties */
|
||||
zprop_register_number(ZFS_PROP_RECORDSIZE, "recordsize",
|
||||
SPA_MAXBLOCKSIZE, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "512 to 128k, power of 2", "RECSIZE");
|
||||
SPA_OLD_MAXBLOCKSIZE, PROP_INHERIT,
|
||||
ZFS_TYPE_FILESYSTEM, "512 to 1M, power of 2", "RECSIZE");
|
||||
|
||||
/* hidden properties */
|
||||
zprop_register_hidden(ZFS_PROP_CREATETXG, "createtxg", PROP_TYPE_NUMBER,
|
||||
|
@ -127,6 +127,8 @@ zpool_prop_init(void)
|
||||
/* hidden properties */
|
||||
zprop_register_hidden(ZPOOL_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
PROP_READONLY, ZFS_TYPE_POOL, "NAME");
|
||||
zprop_register_hidden(ZPOOL_PROP_MAXBLOCKSIZE, "maxblocksize",
|
||||
PROP_TYPE_NUMBER, PROP_READONLY, ZFS_TYPE_POOL, "MAXBLOCKSIZE");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -591,6 +591,9 @@ typedef struct sendflags {
|
||||
/* show progress (ie. -v) */
|
||||
boolean_t progress;
|
||||
|
||||
/* large blocks (>128K) are permitted */
|
||||
boolean_t largeblock;
|
||||
|
||||
/* WRITE_EMBEDDED records of type DATA are permitted */
|
||||
boolean_t embed_data;
|
||||
} sendflags_t;
|
||||
|
@ -1048,21 +1048,36 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
|
||||
break;
|
||||
}
|
||||
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
case ZFS_PROP_VOLBLOCKSIZE:
|
||||
/* must be power of two within SPA_{MIN,MAX}BLOCKSIZE */
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
{
|
||||
int maxbs = SPA_MAXBLOCKSIZE;
|
||||
if (zhp != NULL) {
|
||||
maxbs = zpool_get_prop_int(zhp->zpool_hdl,
|
||||
ZPOOL_PROP_MAXBLOCKSIZE, NULL);
|
||||
}
|
||||
/*
|
||||
* Volumes are limited to a volblocksize of 128KB,
|
||||
* because they typically service workloads with
|
||||
* small random writes, which incur a large performance
|
||||
* penalty with large blocks.
|
||||
*/
|
||||
if (prop == ZFS_PROP_VOLBLOCKSIZE)
|
||||
maxbs = SPA_OLD_MAXBLOCKSIZE;
|
||||
/*
|
||||
* The value must be a power of two between
|
||||
* SPA_MINBLOCKSIZE and maxbs.
|
||||
*/
|
||||
if (intval < SPA_MINBLOCKSIZE ||
|
||||
intval > SPA_MAXBLOCKSIZE || !ISP2(intval)) {
|
||||
intval > maxbs || !ISP2(intval)) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"'%s' must be power of 2 from %u "
|
||||
"to %uk"), propname,
|
||||
(uint_t)SPA_MINBLOCKSIZE,
|
||||
(uint_t)SPA_MAXBLOCKSIZE >> 10);
|
||||
"'%s' must be power of 2 from 512B "
|
||||
"to %uKB"), propname, maxbs >> 10);
|
||||
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
|
||||
goto error;
|
||||
}
|
||||
break;
|
||||
|
||||
}
|
||||
case ZFS_PROP_MLSLABEL:
|
||||
{
|
||||
/*
|
||||
@ -1437,7 +1452,8 @@ zfs_setprop_error(libzfs_handle_t *hdl, zfs_prop_t prop, int err,
|
||||
break;
|
||||
|
||||
case ERANGE:
|
||||
if (prop == ZFS_PROP_COMPRESSION) {
|
||||
if (prop == ZFS_PROP_COMPRESSION ||
|
||||
prop == ZFS_PROP_RECORDSIZE) {
|
||||
(void) zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"property setting is not allowed on "
|
||||
"bootable datasets"));
|
||||
@ -3136,9 +3152,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
|
||||
case EDOM:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"volume block size must be power of 2 from "
|
||||
"%u to %uk"),
|
||||
(uint_t)SPA_MINBLOCKSIZE,
|
||||
(uint_t)SPA_MAXBLOCKSIZE >> 10);
|
||||
"512B to 128KB"));
|
||||
|
||||
return (zfs_error(hdl, EZFS_BADPROP, errbuf));
|
||||
|
||||
|
@ -206,7 +206,7 @@ static void *
|
||||
cksummer(void *arg)
|
||||
{
|
||||
dedup_arg_t *dda = arg;
|
||||
char *buf = malloc(1<<20);
|
||||
char *buf = zfs_alloc(dda->dedup_hdl, SPA_MAXBLOCKSIZE);
|
||||
dmu_replay_record_t thedrr;
|
||||
dmu_replay_record_t *drr = &thedrr;
|
||||
struct drr_begin *drrb = &thedrr.drr_u.drr_begin;
|
||||
@ -271,9 +271,9 @@ cksummer(void *arg)
|
||||
DMU_COMPOUNDSTREAM && drr->drr_payloadlen != 0) {
|
||||
int sz = drr->drr_payloadlen;
|
||||
|
||||
if (sz > 1<<20) {
|
||||
free(buf);
|
||||
buf = malloc(sz);
|
||||
if (sz > SPA_MAXBLOCKSIZE) {
|
||||
buf = zfs_realloc(dda->dedup_hdl, buf,
|
||||
SPA_MAXBLOCKSIZE, sz);
|
||||
}
|
||||
(void) ssread(buf, sz, ofp);
|
||||
if (ferror(stdin))
|
||||
@ -806,7 +806,7 @@ typedef struct send_dump_data {
|
||||
char prevsnap[ZFS_MAXNAMELEN];
|
||||
uint64_t prevsnap_obj;
|
||||
boolean_t seenfrom, seento, replicate, doall, fromorigin;
|
||||
boolean_t verbose, dryrun, parsable, progress, embed_data;
|
||||
boolean_t verbose, dryrun, parsable, progress, embed_data, large_block;
|
||||
int outfd;
|
||||
boolean_t err;
|
||||
nvlist_t *fss;
|
||||
@ -1153,6 +1153,8 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
|
||||
}
|
||||
|
||||
enum lzc_send_flags flags = 0;
|
||||
if (sdd->large_block)
|
||||
flags |= LZC_SEND_FLAG_LARGE_BLOCK;
|
||||
if (sdd->embed_data)
|
||||
flags |= LZC_SEND_FLAG_EMBED_DATA;
|
||||
|
||||
@ -1501,6 +1503,7 @@ zfs_send(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
sdd.parsable = flags->parsable;
|
||||
sdd.progress = flags->progress;
|
||||
sdd.dryrun = flags->dryrun;
|
||||
sdd.large_block = flags->largeblock;
|
||||
sdd.embed_data = flags->embed_data;
|
||||
sdd.filter_cb = filter_func;
|
||||
sdd.filter_cb_arg = cb_arg;
|
||||
@ -2506,7 +2509,7 @@ static int
|
||||
recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
|
||||
{
|
||||
dmu_replay_record_t *drr;
|
||||
void *buf = malloc(1<<20);
|
||||
void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
|
||||
char errbuf[1024];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
|
@ -455,6 +455,10 @@ lzc_get_holds(const char *snapname, nvlist_t **holdsp)
|
||||
*
|
||||
* "fd" is the file descriptor to write the send stream to.
|
||||
*
|
||||
* If "flags" contains LZC_SEND_FLAG_LARGE_BLOCK, the stream is permitted
|
||||
* to contain DRR_WRITE records with drr_length > 128K, and DRR_OBJECT
|
||||
* records with drr_blksz > 128K.
|
||||
*
|
||||
* If "flags" contains LZC_SEND_FLAG_EMBED_DATA, the stream is permitted
|
||||
* to contain DRR_WRITE_EMBEDDED records with drr_etype==BP_EMBEDDED_TYPE_DATA,
|
||||
* which the receiving system must support (as indicated by support
|
||||
@ -471,6 +475,8 @@ lzc_send(const char *snapname, const char *from, int fd,
|
||||
fnvlist_add_int32(args, "fd", fd);
|
||||
if (from != NULL)
|
||||
fnvlist_add_string(args, "fromsnap", from);
|
||||
if (flags & LZC_SEND_FLAG_LARGE_BLOCK)
|
||||
fnvlist_add_boolean(args, "largeblockok");
|
||||
if (flags & LZC_SEND_FLAG_EMBED_DATA)
|
||||
fnvlist_add_boolean(args, "embedok");
|
||||
err = lzc_ioctl(ZFS_IOC_SEND_NEW, snapname, args, NULL);
|
||||
|
@ -53,7 +53,8 @@ int lzc_release(nvlist_t *, nvlist_t **);
|
||||
int lzc_get_holds(const char *, nvlist_t **);
|
||||
|
||||
enum lzc_send_flags {
|
||||
LZC_SEND_FLAG_EMBED_DATA = 1 << 0
|
||||
LZC_SEND_FLAG_EMBED_DATA = 1 << 0,
|
||||
LZC_SEND_FLAG_LARGE_BLOCK = 1 << 1
|
||||
};
|
||||
|
||||
int lzc_send(const char *, const char *, int, enum lzc_send_flags);
|
||||
|
@ -176,12 +176,12 @@ zfs \- configures ZFS file systems
|
||||
|
||||
.LP
|
||||
.nf
|
||||
\fBzfs\fR \fBsend\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
\fBzfs\fR \fBsend\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
.fi
|
||||
|
||||
.LP
|
||||
.nf
|
||||
\fBzfs\fR \fBsend\fR [\fB-e\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
\fBzfs\fR \fBsend\fR [\fB-eL\fR] [\fB-i \fIsnapshot\fR|\fIbookmark\fR]\fR \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
.fi
|
||||
|
||||
.LP
|
||||
@ -1244,7 +1244,9 @@ significant performance gains. Use of this property for general purpose file
|
||||
systems is strongly discouraged, and may adversely affect performance.
|
||||
.sp
|
||||
The size specified must be a power of two greater than or equal to 512 and less
|
||||
than or equal to 128 Kbytes.
|
||||
than or equal to 128 Kbytes. If the \fBlarge_blocks\fR feature is enabled
|
||||
on the pool, the size may be up to 1 Mbyte. See \fBzpool-features\fR(5)
|
||||
for details on ZFS feature flags.
|
||||
.sp
|
||||
Changing the file system's \fBrecordsize\fR affects only files created
|
||||
afterward; existing files are unaffected.
|
||||
@ -2923,7 +2925,7 @@ See \fBzpool-features\fR(5) for details on ZFS feature flags and the
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs send\fR [\fB-DnPpRve\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
\fBzfs send\fR [\fB-DnPpRveL\fR] [\fB-\fR[\fBiI\fR] \fIsnapshot\fR] \fIsnapshot\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
@ -2996,6 +2998,21 @@ will be much better if the filesystem uses a dedup-capable checksum (eg.
|
||||
\fBsha256\fR).
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fB-L\fR\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
Generate a stream which may contain blocks larger than 128KB. This flag
|
||||
has no effect if the \fBlarge_blocks\fR pool feature is disabled, or if
|
||||
the \fRrecordsize\fR property of this filesystem has never been set above
|
||||
128KB. The receiving system must have the \fBlarge_blocks\fR pool feature
|
||||
enabled as well. See \fBzpool-features\fR(5) for details on ZFS feature
|
||||
flags and the \fBlarge_blocks\fR feature.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
@ -3064,7 +3081,7 @@ on future versions of \fBZFS\fR.
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fBzfs send\fR [\fB-e\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
\fBzfs send\fR [\fB-eL\fR] [\fB-i\fR \fIsnapshot\fR|\fIbookmark\fR] \fIfilesystem\fR|\fIvolume\fR|\fIsnapshot\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
@ -3092,6 +3109,21 @@ be the origin snapshot, or an earlier snapshot in the origin's filesystem,
|
||||
or the origin's origin, etc.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fB-L\fR\fR
|
||||
.ad
|
||||
.sp .6
|
||||
.RS 4n
|
||||
Generate a stream which may contain blocks larger than 128KB. This flag
|
||||
has no effect if the \fBlarge_blocks\fR pool feature is disabled, or if
|
||||
the \fRrecordsize\fR property of this filesystem has never been set above
|
||||
128KB. The receiving system must have the \fBlarge_blocks\fR pool feature
|
||||
enabled as well. See \fBzpool-features\fR(5) for details on ZFS feature
|
||||
flags and the \fBlarge_blocks\fR feature.
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
|
@ -18,7 +18,6 @@
|
||||
.SH NAME
|
||||
zpool\-features \- ZFS pool feature descriptions
|
||||
.SH DESCRIPTION
|
||||
.sp
|
||||
.LP
|
||||
ZFS pool on\-disk format versions are specified via "features" which replace
|
||||
the old on\-disk format numbers (the last supported on\-disk format number is
|
||||
@ -36,7 +35,6 @@ format of the pool is specified by the set of all features marked as
|
||||
\fBactive\fR on the pool. If the pool was created by another software version
|
||||
this set may include unsupported features.
|
||||
.SS "Identifying features"
|
||||
.sp
|
||||
.LP
|
||||
Every feature has a guid of the form \fIcom.example:feature_name\fR. The reverse
|
||||
DNS name ensures that the feature's guid is unique across all ZFS
|
||||
@ -51,7 +49,6 @@ name is the portion of its guid which follows the ':' (e.g.
|
||||
however a feature's short name may differ across ZFS implementations if
|
||||
following the convention would result in name conflicts.
|
||||
.SS "Feature states"
|
||||
.sp
|
||||
.LP
|
||||
Features can be in one of three states:
|
||||
.sp
|
||||
@ -97,7 +94,6 @@ cannot be disabled once they have been enabled.
|
||||
The state of supported features is exposed through pool properties of the form
|
||||
\fIfeature@short_name\fR.
|
||||
.SS "Read\-only compatibility"
|
||||
.sp
|
||||
.LP
|
||||
Some features may make on\-disk format changes that do not interfere with other
|
||||
software's ability to read from the pool. These features are referred to as
|
||||
@ -106,7 +102,6 @@ compatible, the pool can be imported in read\-only mode by setting the
|
||||
\fBreadonly\fR property during import (see \fBzpool\fR(1M) for details on
|
||||
importing pools).
|
||||
.SS "Unsupported features"
|
||||
.sp
|
||||
.LP
|
||||
For each unsupported feature enabled on an imported pool a pool property
|
||||
named \fIunsupported@feature_guid\fR will indicate why the import was allowed
|
||||
@ -133,13 +128,11 @@ read\-only mode.
|
||||
.RE
|
||||
|
||||
.SS "Feature dependencies"
|
||||
.sp
|
||||
.LP
|
||||
Some features depend on other features being enabled in order to function
|
||||
properly. Enabling a feature will automatically enable any features it
|
||||
depends on.
|
||||
.SH FEATURES
|
||||
.sp
|
||||
.LP
|
||||
The following features are supported on this system:
|
||||
.sp
|
||||
@ -430,5 +423,26 @@ never return to being \fBenabled\fR.
|
||||
|
||||
.RE
|
||||
|
||||
.sp
|
||||
.ne 2
|
||||
.na
|
||||
\fB\fBlarge_blocks\fR\fR
|
||||
.ad
|
||||
.RS 4n
|
||||
.TS
|
||||
l l .
|
||||
GUID org.open-zfs:large_block
|
||||
READ\-ONLY COMPATIBLE no
|
||||
DEPENDENCIES extensible_dataset
|
||||
.TE
|
||||
|
||||
The \fBlarge_block\fR feature allows the record size on a dataset to be
|
||||
set larger than 128KB.
|
||||
|
||||
This feature becomes \fBactive\fR once a \fBrecordsize\fR property has been
|
||||
set larger than 128KB, and will return to being \fBenabled\fR once all
|
||||
filesystems that have ever had their recordsize larger than 128KB are destroyed.
|
||||
.RE
|
||||
|
||||
.SH "SEE ALSO"
|
||||
\fBzpool\fR(1M)
|
||||
|
@ -43,7 +43,7 @@ bpobj_alloc_empty(objset_t *os, int blocksize, dmu_tx_t *tx)
|
||||
if (!spa_feature_is_active(spa, SPA_FEATURE_EMPTY_BPOBJ)) {
|
||||
ASSERT0(dp->dp_empty_bpobj);
|
||||
dp->dp_empty_bpobj =
|
||||
bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx);
|
||||
bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx);
|
||||
VERIFY(zap_add(os,
|
||||
DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_EMPTY_BPOBJ, sizeof (uint64_t), 1,
|
||||
@ -396,7 +396,8 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
|
||||
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
|
||||
if (bpo->bpo_phys->bpo_subobjs == 0) {
|
||||
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
|
||||
DMU_OT_BPOBJ_SUBOBJ, SPA_MAXBLOCKSIZE, DMU_OT_NONE, 0, tx);
|
||||
DMU_OT_BPOBJ_SUBOBJ, SPA_OLD_MAXBLOCKSIZE,
|
||||
DMU_OT_NONE, 0, tx);
|
||||
}
|
||||
|
||||
dmu_object_info_t doi;
|
||||
|
@ -65,7 +65,7 @@ bptree_alloc(objset_t *os, dmu_tx_t *tx)
|
||||
bptree_phys_t *bt;
|
||||
|
||||
obj = dmu_object_alloc(os, DMU_OTN_UINT64_METADATA,
|
||||
SPA_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
|
||||
SPA_OLD_MAXBLOCKSIZE, DMU_OTN_UINT64_METADATA,
|
||||
sizeof (bptree_phys_t), tx);
|
||||
|
||||
/*
|
||||
|
@ -2022,10 +2022,8 @@ dbuf_spill_set_blksz(dmu_buf_t *db_fake, uint64_t blksz, dmu_tx_t *tx)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
if (blksz == 0)
|
||||
blksz = SPA_MINBLOCKSIZE;
|
||||
if (blksz > SPA_MAXBLOCKSIZE)
|
||||
blksz = SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE);
|
||||
ASSERT3U(blksz, <=, spa_maxblocksize(dmu_objset_spa(db->db_objset)));
|
||||
blksz = P2ROUNDUP(blksz, SPA_MINBLOCKSIZE);
|
||||
|
||||
DB_DNODE_ENTER(db);
|
||||
dn = DB_DNODE(db);
|
||||
|
@ -255,6 +255,14 @@ logbias_changed_cb(void *arg, uint64_t newval)
|
||||
zil_set_logbias(os->os_zil, newval);
|
||||
}
|
||||
|
||||
static void
|
||||
recordsize_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
objset_t *os = arg;
|
||||
|
||||
os->os_recordsize = newval;
|
||||
}
|
||||
|
||||
void
|
||||
dmu_objset_byteswap(void *buf, size_t size)
|
||||
{
|
||||
@ -384,6 +392,11 @@ dmu_objset_open_impl(spa_t *spa, dsl_dataset_t *ds, blkptr_t *bp,
|
||||
ZFS_PROP_REDUNDANT_METADATA),
|
||||
redundant_metadata_changed_cb, os);
|
||||
}
|
||||
if (err == 0) {
|
||||
err = dsl_prop_register(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
|
||||
recordsize_changed_cb, os);
|
||||
}
|
||||
}
|
||||
if (err != 0) {
|
||||
VERIFY(arc_buf_remove_ref(os->os_phys_buf,
|
||||
@ -642,6 +655,9 @@ dmu_objset_evict(objset_t *os)
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_REDUNDANT_METADATA),
|
||||
redundant_metadata_changed_cb, os));
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
|
||||
recordsize_changed_cb, os));
|
||||
}
|
||||
VERIFY0(dsl_prop_unregister(ds,
|
||||
zfs_prop_to_name(ZFS_PROP_PRIMARYCACHE),
|
||||
|
@ -206,11 +206,12 @@ dump_write(dmu_sendarg_t *dsp, dmu_object_type_t type,
|
||||
drrw->drr_offset = offset;
|
||||
drrw->drr_length = blksz;
|
||||
drrw->drr_toguid = dsp->dsa_toguid;
|
||||
if (BP_IS_EMBEDDED(bp)) {
|
||||
if (bp == NULL || BP_IS_EMBEDDED(bp)) {
|
||||
/*
|
||||
* There's no pre-computed checksum of embedded BP's, so
|
||||
* (like fletcher4-checkummed blocks) userland will have
|
||||
* to compute a dedup-capable checksum itself.
|
||||
* There's no pre-computed checksum for partial-block
|
||||
* writes or embedded BP's, so (like
|
||||
* fletcher4-checkummed blocks) userland will have to
|
||||
* compute a dedup-capable checksum itself.
|
||||
*/
|
||||
drrw->drr_checksumtype = ZIO_CHECKSUM_OFF;
|
||||
} else {
|
||||
@ -372,6 +373,10 @@ dump_dnode(dmu_sendarg_t *dsp, uint64_t object, dnode_phys_t *dnp)
|
||||
drro->drr_compress = dnp->dn_compress;
|
||||
drro->drr_toguid = dsp->dsa_toguid;
|
||||
|
||||
if (!(dsp->dsa_featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
|
||||
drro->drr_blksz > SPA_OLD_MAXBLOCKSIZE)
|
||||
drro->drr_blksz = SPA_OLD_MAXBLOCKSIZE;
|
||||
|
||||
if (dump_bytes(dsp, dsp->dsa_drr, sizeof (dmu_replay_record_t)) != 0)
|
||||
return (SET_ERROR(EINTR));
|
||||
|
||||
@ -491,6 +496,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
uint32_t aflags = ARC_WAIT;
|
||||
arc_buf_t *abuf;
|
||||
int blksz = BP_GET_LSIZE(bp);
|
||||
uint64_t offset;
|
||||
|
||||
ASSERT3U(blksz, ==, dnp->dn_datablkszsec << SPA_MINBLOCKSHIFT);
|
||||
ASSERT0(zb->zb_level);
|
||||
@ -511,8 +517,24 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
}
|
||||
}
|
||||
|
||||
err = dump_write(dsp, type, zb->zb_object, zb->zb_blkid * blksz,
|
||||
blksz, bp, abuf->b_data);
|
||||
offset = zb->zb_blkid * blksz;
|
||||
|
||||
if (!(dsp->dsa_featureflags &
|
||||
DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
|
||||
blksz > SPA_OLD_MAXBLOCKSIZE) {
|
||||
char *buf = abuf->b_data;
|
||||
while (blksz > 0 && err == 0) {
|
||||
int n = MIN(blksz, SPA_OLD_MAXBLOCKSIZE);
|
||||
err = dump_write(dsp, type, zb->zb_object,
|
||||
offset, n, NULL, buf);
|
||||
offset += n;
|
||||
buf += n;
|
||||
blksz -= n;
|
||||
}
|
||||
} else {
|
||||
err = dump_write(dsp, type, zb->zb_object,
|
||||
offset, blksz, bp, abuf->b_data);
|
||||
}
|
||||
(void) arc_buf_remove_ref(abuf, &abuf);
|
||||
}
|
||||
|
||||
@ -526,7 +548,7 @@ backup_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
|
||||
static int
|
||||
dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
|
||||
zfs_bookmark_phys_t *fromzb, boolean_t is_clone, boolean_t embedok,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
boolean_t large_block_ok, int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
objset_t *os;
|
||||
dmu_replay_record_t *drr;
|
||||
@ -561,6 +583,8 @@ dmu_send_impl(void *tag, dsl_pool_t *dp, dsl_dataset_t *ds,
|
||||
}
|
||||
#endif
|
||||
|
||||
if (large_block_ok && ds->ds_large_blocks)
|
||||
featureflags |= DMU_BACKUP_FEATURE_LARGE_BLOCKS;
|
||||
if (embedok &&
|
||||
spa_feature_is_active(dp->dp_spa, SPA_FEATURE_EMBEDDED_DATA)) {
|
||||
featureflags |= DMU_BACKUP_FEATURE_EMBED_DATA;
|
||||
@ -656,7 +680,8 @@ out:
|
||||
|
||||
int
|
||||
dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
boolean_t embedok, int outfd, vnode_t *vp, offset_t *off)
|
||||
boolean_t embedok, boolean_t large_block_ok,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
dsl_pool_t *dp;
|
||||
dsl_dataset_t *ds;
|
||||
@ -690,18 +715,19 @@ dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
zb.zbm_guid = fromds->ds_phys->ds_guid;
|
||||
is_clone = (fromds->ds_dir != ds->ds_dir);
|
||||
dsl_dataset_rele(fromds, FTAG);
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok,
|
||||
outfd, vp, off);
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
|
||||
embedok, large_block_ok, outfd, vp, off);
|
||||
} else {
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok,
|
||||
outfd, vp, off);
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
|
||||
embedok, large_block_ok, outfd, vp, off);
|
||||
}
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
return (err);
|
||||
}
|
||||
|
||||
int
|
||||
dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
||||
dmu_send(const char *tosnap, const char *fromsnap,
|
||||
boolean_t embedok, boolean_t large_block_ok,
|
||||
int outfd, vnode_t *vp, offset_t *off)
|
||||
{
|
||||
dsl_pool_t *dp;
|
||||
@ -768,11 +794,11 @@ dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
||||
dsl_pool_rele(dp, FTAG);
|
||||
return (err);
|
||||
}
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone, embedok,
|
||||
outfd, vp, off);
|
||||
err = dmu_send_impl(FTAG, dp, ds, &zb, is_clone,
|
||||
embedok, large_block_ok, outfd, vp, off);
|
||||
} else {
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE, embedok,
|
||||
outfd, vp, off);
|
||||
err = dmu_send_impl(FTAG, dp, ds, NULL, B_FALSE,
|
||||
embedok, large_block_ok, outfd, vp, off);
|
||||
}
|
||||
if (owned)
|
||||
dsl_dataset_disown(ds, FTAG);
|
||||
@ -972,6 +998,15 @@ dmu_recv_begin_check(void *arg, dmu_tx_t *tx)
|
||||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LZ4_COMPRESS))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
/*
|
||||
* The receiving code doesn't know how to translate large blocks
|
||||
* to smaller ones, so the pool must have the LARGE_BLOCKS
|
||||
* feature enabled if the stream has LARGE_BLOCKS.
|
||||
*/
|
||||
if ((featureflags & DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
|
||||
!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
error = dsl_dataset_hold(dp, tofs, FTAG, &ds);
|
||||
if (error == 0) {
|
||||
/* target fs already exists; recv into temp clone */
|
||||
@ -1097,6 +1132,13 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
|
||||
}
|
||||
VERIFY0(dsl_dataset_own_obj(dp, dsobj, dmu_recv_tag, &newds));
|
||||
|
||||
if ((DMU_GET_FEATUREFLAGS(drrb->drr_versioninfo) &
|
||||
DMU_BACKUP_FEATURE_LARGE_BLOCKS) &&
|
||||
!newds->ds_large_blocks) {
|
||||
dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
|
||||
newds->ds_large_blocks = B_TRUE;
|
||||
}
|
||||
|
||||
dmu_buf_will_dirty(newds->ds_dbuf, tx);
|
||||
newds->ds_phys->ds_flags |= DS_FLAG_INCONSISTENT;
|
||||
|
||||
@ -1222,6 +1264,7 @@ restore_read(struct restorearg *ra, int len, char *buf)
|
||||
|
||||
/* some things will require 8-byte alignment, so everything must */
|
||||
ASSERT0(len % 8);
|
||||
ASSERT3U(len, <=, ra->bufsize);
|
||||
|
||||
while (done < len) {
|
||||
ssize_t resid;
|
||||
@ -1361,7 +1404,7 @@ restore_object(struct restorearg *ra, objset_t *os, struct drr_object *drro)
|
||||
drro->drr_compress >= ZIO_COMPRESS_FUNCTIONS ||
|
||||
P2PHASE(drro->drr_blksz, SPA_MINBLOCKSIZE) ||
|
||||
drro->drr_blksz < SPA_MINBLOCKSIZE ||
|
||||
drro->drr_blksz > SPA_MAXBLOCKSIZE ||
|
||||
drro->drr_blksz > spa_maxblocksize(dmu_objset_spa(os)) ||
|
||||
drro->drr_bonuslen > DN_MAX_BONUSLEN) {
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
@ -1634,7 +1677,7 @@ restore_spill(struct restorearg *ra, objset_t *os, struct drr_spill *drrs)
|
||||
int err;
|
||||
|
||||
if (drrs->drr_length < SPA_MINBLOCKSIZE ||
|
||||
drrs->drr_length > SPA_MAXBLOCKSIZE)
|
||||
drrs->drr_length > spa_maxblocksize(dmu_objset_spa(os)))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
data = restore_read(ra, drrs->drr_length, NULL);
|
||||
@ -1721,7 +1764,7 @@ dmu_recv_stream(dmu_recv_cookie_t *drc, vnode_t *vp, offset_t *voffp,
|
||||
ra.cksum = drc->drc_cksum;
|
||||
ra.vp = vp;
|
||||
ra.voff = *voffp;
|
||||
ra.bufsize = 1<<20;
|
||||
ra.bufsize = SPA_MAXBLOCKSIZE;
|
||||
ra.buf = kmem_alloc(ra.bufsize, KM_SLEEP);
|
||||
|
||||
/* these were verified in dmu_recv_begin */
|
||||
|
@ -224,7 +224,7 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
return;
|
||||
|
||||
min_bs = SPA_MINBLOCKSHIFT;
|
||||
max_bs = SPA_MAXBLOCKSHIFT;
|
||||
max_bs = highbit64(txh->txh_tx->tx_objset->os_recordsize) - 1;
|
||||
min_ibs = DN_MIN_INDBLKSHIFT;
|
||||
max_ibs = DN_MAX_INDBLKSHIFT;
|
||||
|
||||
@ -293,6 +293,14 @@ dmu_tx_count_write(dmu_tx_hold_t *txh, uint64_t off, uint64_t len)
|
||||
*/
|
||||
ASSERT(dn->dn_datablkshift != 0);
|
||||
min_bs = max_bs = dn->dn_datablkshift;
|
||||
} else {
|
||||
/*
|
||||
* The blocksize can increase up to the recordsize,
|
||||
* or if it is already more than the recordsize,
|
||||
* up to the next power of 2.
|
||||
*/
|
||||
min_bs = highbit64(dn->dn_datablksz - 1);
|
||||
max_bs = MAX(max_bs, highbit64(dn->dn_datablksz - 1));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -750,11 +758,11 @@ dmu_tx_hold_zap(dmu_tx_t *tx, uint64_t object, int add, const char *name)
|
||||
bp = &dn->dn_phys->dn_blkptr[0];
|
||||
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
bp, bp->blk_birth))
|
||||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tooverwrite += MZAP_MAX_BLKSZ;
|
||||
else
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_towrite += MZAP_MAX_BLKSZ;
|
||||
if (!BP_IS_HOLE(bp))
|
||||
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tounref += MZAP_MAX_BLKSZ;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1543,18 +1551,18 @@ dmu_tx_hold_spill(dmu_tx_t *tx, uint64_t object)
|
||||
|
||||
/* If blkptr doesn't exist then add space to towrite */
|
||||
if (!(dn->dn_phys->dn_flags & DNODE_FLAG_SPILL_BLKPTR)) {
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE;
|
||||
} else {
|
||||
blkptr_t *bp;
|
||||
|
||||
bp = &dn->dn_phys->dn_spill;
|
||||
if (dsl_dataset_block_freeable(dn->dn_objset->os_dsl_dataset,
|
||||
bp, bp->blk_birth))
|
||||
txh->txh_space_tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tooverwrite += SPA_OLD_MAXBLOCKSIZE;
|
||||
else
|
||||
txh->txh_space_towrite += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_towrite += SPA_OLD_MAXBLOCKSIZE;
|
||||
if (!BP_IS_HOLE(bp))
|
||||
txh->txh_space_tounref += SPA_MAXBLOCKSIZE;
|
||||
txh->txh_space_tounref += SPA_OLD_MAXBLOCKSIZE;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -510,10 +510,10 @@ dnode_allocate(dnode_t *dn, dmu_object_type_t ot, int blocksize, int ibs,
|
||||
{
|
||||
int i;
|
||||
|
||||
ASSERT3U(blocksize, <=,
|
||||
spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
|
||||
if (blocksize == 0)
|
||||
blocksize = 1 << zfs_default_bs;
|
||||
else if (blocksize > SPA_MAXBLOCKSIZE)
|
||||
blocksize = SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
blocksize = P2ROUNDUP(blocksize, SPA_MINBLOCKSIZE);
|
||||
|
||||
@ -594,7 +594,8 @@ dnode_reallocate(dnode_t *dn, dmu_object_type_t ot, int blocksize,
|
||||
int nblkptr;
|
||||
|
||||
ASSERT3U(blocksize, >=, SPA_MINBLOCKSIZE);
|
||||
ASSERT3U(blocksize, <=, SPA_MAXBLOCKSIZE);
|
||||
ASSERT3U(blocksize, <=,
|
||||
spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
|
||||
ASSERT0(blocksize % SPA_MINBLOCKSIZE);
|
||||
ASSERT(dn->dn_object != DMU_META_DNODE_OBJECT || dmu_tx_private_ok(tx));
|
||||
ASSERT(tx->tx_txg != 0);
|
||||
@ -1347,10 +1348,9 @@ dnode_set_blksz(dnode_t *dn, uint64_t size, int ibs, dmu_tx_t *tx)
|
||||
dmu_buf_impl_t *db;
|
||||
int err;
|
||||
|
||||
ASSERT3U(size, <=, spa_maxblocksize(dmu_objset_spa(dn->dn_objset)));
|
||||
if (size == 0)
|
||||
size = SPA_MINBLOCKSIZE;
|
||||
if (size > SPA_MAXBLOCKSIZE)
|
||||
size = SPA_MAXBLOCKSIZE;
|
||||
else
|
||||
size = P2ROUNDUP(size, SPA_MINBLOCKSIZE);
|
||||
|
||||
|
@ -50,6 +50,17 @@
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/dsl_bookmark.h>
|
||||
|
||||
/*
|
||||
* The SPA supports block sizes up to 16MB. However, very large blocks
|
||||
* can have an impact on i/o latency (e.g. tying up a spinning disk for
|
||||
* ~300ms), and also potentially on the memory allocator. Therefore,
|
||||
* we do not allow the recordsize to be set larger than zfs_max_recordsize
|
||||
* (default 1MB). Larger blocks can be created by changing this tunable,
|
||||
* and pools with larger blocks can always be imported and used, regardless
|
||||
* of this setting.
|
||||
*/
|
||||
int zfs_max_recordsize = 1 * 1024 * 1024;
|
||||
|
||||
#define SWITCH64(x, y) \
|
||||
{ \
|
||||
uint64_t __tmp = (x); \
|
||||
@ -59,8 +70,6 @@
|
||||
|
||||
#define DS_REF_MAX (1ULL << 62)
|
||||
|
||||
#define DSL_DEADLIST_BLOCKSIZE SPA_MAXBLOCKSIZE
|
||||
|
||||
/*
|
||||
* Figure out how much of this delta should be propogated to the dsl_dir
|
||||
* layer. If there's a refreservation, that space has already been
|
||||
@ -110,6 +119,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
ds->ds_phys->ds_compressed_bytes += compressed;
|
||||
ds->ds_phys->ds_uncompressed_bytes += uncompressed;
|
||||
ds->ds_phys->ds_unique_bytes += used;
|
||||
if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
|
||||
ds->ds_need_large_blocks = B_TRUE;
|
||||
mutex_exit(&ds->ds_lock);
|
||||
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD, delta,
|
||||
compressed, uncompressed, tx);
|
||||
@ -387,6 +398,14 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
list_create(&ds->ds_sendstreams, sizeof (dmu_sendarg_t),
|
||||
offsetof(dmu_sendarg_t, dsa_link));
|
||||
|
||||
if (doi.doi_type == DMU_OTN_ZAP_METADATA) {
|
||||
err = zap_contains(mos, dsobj, DS_FIELD_LARGE_BLOCKS);
|
||||
if (err == 0)
|
||||
ds->ds_large_blocks = B_TRUE;
|
||||
else
|
||||
ASSERT3U(err, ==, ENOENT);
|
||||
}
|
||||
|
||||
if (err == 0) {
|
||||
err = dsl_dir_hold_obj(dp,
|
||||
ds->ds_phys->ds_dir_obj, NULL, ds, &ds->ds_dir);
|
||||
@ -700,6 +719,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
dsphys->ds_flags |= origin->ds_phys->ds_flags &
|
||||
(DS_FLAG_INCONSISTENT | DS_FLAG_CI_DATASET);
|
||||
|
||||
if (origin->ds_large_blocks)
|
||||
dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
|
||||
|
||||
dmu_buf_will_dirty(origin->ds_dbuf, tx);
|
||||
origin->ds_phys->ds_num_children++;
|
||||
|
||||
@ -1213,6 +1235,9 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
|
||||
dsphys->ds_bp = ds->ds_phys->ds_bp;
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
|
||||
if (ds->ds_large_blocks)
|
||||
dsl_dataset_activate_large_blocks_sync_impl(dsobj, tx);
|
||||
|
||||
ASSERT3U(ds->ds_prev != 0, ==, ds->ds_phys->ds_prev_snap_obj != 0);
|
||||
if (ds->ds_prev) {
|
||||
uint64_t next_clones_obj =
|
||||
@ -1486,6 +1511,11 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
|
||||
ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
|
||||
|
||||
dmu_objset_sync(ds->ds_objset, zio, tx);
|
||||
|
||||
if (ds->ds_need_large_blocks && !ds->ds_large_blocks) {
|
||||
dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx);
|
||||
ds->ds_large_blocks = B_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -3128,6 +3158,77 @@ dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap,
|
||||
return (err);
|
||||
}
|
||||
|
||||
static int
|
||||
dsl_dataset_activate_large_blocks_check(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
const char *dsname = arg;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_pool_t *dp = dmu_tx_pool(tx);
|
||||
int error = 0;
|
||||
|
||||
if (!spa_feature_is_enabled(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
|
||||
ASSERT(spa_feature_is_enabled(dp->dp_spa,
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET));
|
||||
|
||||
error = dsl_dataset_hold(dp, dsname, FTAG, &ds);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
|
||||
if (ds->ds_large_blocks)
|
||||
error = EALREADY;
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
void
|
||||
dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx)
|
||||
{
|
||||
spa_t *spa = dmu_tx_pool(tx)->dp_spa;
|
||||
objset_t *mos = dmu_tx_pool(tx)->dp_meta_objset;
|
||||
uint64_t zero = 0;
|
||||
|
||||
spa_feature_incr(spa, SPA_FEATURE_LARGE_BLOCKS, tx);
|
||||
dmu_object_zapify(mos, dsobj, DMU_OT_DSL_DATASET, tx);
|
||||
|
||||
VERIFY0(zap_add(mos, dsobj, DS_FIELD_LARGE_BLOCKS,
|
||||
sizeof (zero), 1, &zero, tx));
|
||||
}
|
||||
|
||||
static void
|
||||
dsl_dataset_activate_large_blocks_sync(void *arg, dmu_tx_t *tx)
|
||||
{
|
||||
const char *dsname = arg;
|
||||
dsl_dataset_t *ds;
|
||||
|
||||
VERIFY0(dsl_dataset_hold(dmu_tx_pool(tx), dsname, FTAG, &ds));
|
||||
|
||||
dsl_dataset_activate_large_blocks_sync_impl(ds->ds_object, tx);
|
||||
ASSERT(!ds->ds_large_blocks);
|
||||
ds->ds_large_blocks = B_TRUE;
|
||||
dsl_dataset_rele(ds, FTAG);
|
||||
}
|
||||
|
||||
int
|
||||
dsl_dataset_activate_large_blocks(const char *dsname)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = dsl_sync_task(dsname,
|
||||
dsl_dataset_activate_large_blocks_check,
|
||||
dsl_dataset_activate_large_blocks_sync, (void *)dsname,
|
||||
1, ZFS_SPACE_CHECK_RESERVED);
|
||||
|
||||
/*
|
||||
* EALREADY indicates that this dataset already supports large blocks.
|
||||
*/
|
||||
if (error == EALREADY)
|
||||
error = 0;
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return TRUE if 'earlier' is an earlier snapshot in 'later's timeline.
|
||||
* For example, they could both be snapshots of the same filesystem, and
|
||||
|
@ -143,7 +143,7 @@ uint64_t
|
||||
dsl_deadlist_alloc(objset_t *os, dmu_tx_t *tx)
|
||||
{
|
||||
if (spa_version(dmu_objset_spa(os)) < SPA_VERSION_DEADLISTS)
|
||||
return (bpobj_alloc(os, SPA_MAXBLOCKSIZE, tx));
|
||||
return (bpobj_alloc(os, SPA_OLD_MAXBLOCKSIZE, tx));
|
||||
return (zap_create(os, DMU_OT_DEADLIST, DMU_OT_DEADLIST_HDR,
|
||||
sizeof (dsl_deadlist_phys_t), tx));
|
||||
}
|
||||
@ -180,7 +180,7 @@ dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
|
||||
{
|
||||
if (dle->dle_bpobj.bpo_object ==
|
||||
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
|
||||
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
|
||||
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
|
||||
bpobj_close(&dle->dle_bpobj);
|
||||
bpobj_decr_empty(dl->dl_os, tx);
|
||||
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
|
||||
@ -254,7 +254,7 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
|
||||
|
||||
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
|
||||
dle->dle_mintxg = mintxg;
|
||||
obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
|
||||
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
|
||||
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
|
||||
avl_add(&dl->dl_tree, dle);
|
||||
|
||||
@ -338,7 +338,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
|
||||
if (dle->dle_mintxg >= maxtxg)
|
||||
break;
|
||||
|
||||
obj = bpobj_alloc_empty(dl->dl_os, SPA_MAXBLOCKSIZE, tx);
|
||||
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
|
||||
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
|
||||
dle->dle_mintxg, obj, tx));
|
||||
}
|
||||
|
@ -264,6 +264,10 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
|
||||
|
||||
obj = ds->ds_object;
|
||||
|
||||
if (ds->ds_large_blocks) {
|
||||
ASSERT0(zap_contains(mos, obj, DS_FIELD_LARGE_BLOCKS));
|
||||
spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx);
|
||||
}
|
||||
if (ds->ds_phys->ds_prev_snap_obj != 0) {
|
||||
ASSERT3P(ds->ds_prev, ==, NULL);
|
||||
VERIFY0(dsl_dataset_hold_obj(dp,
|
||||
@ -720,6 +724,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
ASSERT0(ds->ds_reserved);
|
||||
}
|
||||
|
||||
if (ds->ds_large_blocks)
|
||||
spa_feature_decr(dp->dp_spa, SPA_FEATURE_LARGE_BLOCKS, tx);
|
||||
|
||||
dsl_scan_ds_destroyed(ds, tx);
|
||||
|
||||
obj = ds->ds_object;
|
||||
|
@ -367,7 +367,7 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
||||
FREE_DIR_NAME, &dp->dp_free_dir));
|
||||
|
||||
/* create and open the free_bplist */
|
||||
obj = bpobj_alloc(dp->dp_meta_objset, SPA_MAXBLOCKSIZE, tx);
|
||||
obj = bpobj_alloc(dp->dp_meta_objset, SPA_OLD_MAXBLOCKSIZE, tx);
|
||||
VERIFY(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx) == 0);
|
||||
VERIFY0(bpobj_open(&dp->dp_free_bpobj,
|
||||
@ -792,7 +792,7 @@ dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx)
|
||||
* subobj support. So call dmu_object_alloc() directly.
|
||||
*/
|
||||
obj = dmu_object_alloc(dp->dp_meta_objset, DMU_OT_BPOBJ,
|
||||
SPA_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
|
||||
SPA_OLD_MAXBLOCKSIZE, DMU_OT_BPOBJ_HDR, sizeof (bpobj_phys_t), tx);
|
||||
VERIFY0(zap_add(dp->dp_meta_objset, DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_FREE_BPOBJ, sizeof (uint64_t), 1, &obj, tx));
|
||||
VERIFY0(bpobj_open(&dp->dp_free_bpobj, dp->dp_meta_objset, obj));
|
||||
|
@ -125,7 +125,7 @@ int metaslab_debug_unload = 0;
|
||||
* an allocation of this size then it switches to using more
|
||||
* aggressive strategy (i.e search by size rather than offset).
|
||||
*/
|
||||
uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
|
||||
uint64_t metaslab_df_alloc_threshold = SPA_OLD_MAXBLOCKSIZE;
|
||||
|
||||
/*
|
||||
* The minimum free space, in percent, which must be available
|
||||
|
@ -500,7 +500,7 @@ sa_resize_spill(sa_handle_t *hdl, uint32_t size, dmu_tx_t *tx)
|
||||
|
||||
if (size == 0) {
|
||||
blocksize = SPA_MINBLOCKSIZE;
|
||||
} else if (size > SPA_MAXBLOCKSIZE) {
|
||||
} else if (size > SPA_OLD_MAXBLOCKSIZE) {
|
||||
ASSERT(0);
|
||||
return (SET_ERROR(EFBIG));
|
||||
} else {
|
||||
@ -675,7 +675,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
|
||||
hdrsize = sa_find_sizes(sa, attr_desc, attr_count, hdl->sa_bonus,
|
||||
SA_BONUS, &i, &used, &spilling);
|
||||
|
||||
if (used > SPA_MAXBLOCKSIZE)
|
||||
if (used > SPA_OLD_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(EFBIG));
|
||||
|
||||
VERIFY(0 == dmu_set_bonus(hdl->sa_bonus, spilling ?
|
||||
@ -699,7 +699,7 @@ sa_build_layouts(sa_handle_t *hdl, sa_bulk_attr_t *attr_desc, int attr_count,
|
||||
attr_count - i, hdl->sa_spill, SA_SPILL, &i,
|
||||
&spill_used, &dummy);
|
||||
|
||||
if (spill_used > SPA_MAXBLOCKSIZE)
|
||||
if (spill_used > SPA_OLD_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(EFBIG));
|
||||
|
||||
buf_space = hdl->sa_spill->db_size - spillhdrsize;
|
||||
|
@ -267,6 +267,14 @@ spa_prop_get_config(spa_t *spa, nvlist_t **nvp)
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_ALTROOT, spa->spa_root,
|
||||
0, ZPROP_SRC_LOCAL);
|
||||
|
||||
if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS)) {
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
|
||||
MIN(zfs_max_recordsize, SPA_MAXBLOCKSIZE), ZPROP_SRC_NONE);
|
||||
} else {
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_MAXBLOCKSIZE, NULL,
|
||||
SPA_OLD_MAXBLOCKSIZE, ZPROP_SRC_NONE);
|
||||
}
|
||||
|
||||
if ((dp = list_head(&spa->spa_config_list)) != NULL) {
|
||||
if (dp->scd_path == NULL) {
|
||||
spa_prop_add_list(*nvp, ZPOOL_PROP_CACHEFILE,
|
||||
@ -481,7 +489,7 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
||||
|
||||
if (!error) {
|
||||
objset_t *os;
|
||||
uint64_t compress;
|
||||
uint64_t propval;
|
||||
|
||||
if (strval == NULL || strval[0] == '\0') {
|
||||
objnum = zpool_prop_default_numeric(
|
||||
@ -492,15 +500,25 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
||||
if (error = dmu_objset_hold(strval, FTAG, &os))
|
||||
break;
|
||||
|
||||
/* Must be ZPL and not gzip compressed. */
|
||||
/*
|
||||
* Must be ZPL, and its property settings
|
||||
* must be supported by GRUB (compression
|
||||
* is not gzip, and large blocks are not used).
|
||||
*/
|
||||
|
||||
if (dmu_objset_type(os) != DMU_OST_ZFS) {
|
||||
error = SET_ERROR(ENOTSUP);
|
||||
} else if ((error =
|
||||
dsl_prop_get_int_ds(dmu_objset_ds(os),
|
||||
zfs_prop_to_name(ZFS_PROP_COMPRESSION),
|
||||
&compress)) == 0 &&
|
||||
!BOOTFS_COMPRESS_VALID(compress)) {
|
||||
&propval)) == 0 &&
|
||||
!BOOTFS_COMPRESS_VALID(propval)) {
|
||||
error = SET_ERROR(ENOTSUP);
|
||||
} else if ((error =
|
||||
dsl_prop_get_int_ds(dmu_objset_ds(os),
|
||||
zfs_prop_to_name(ZFS_PROP_RECORDSIZE),
|
||||
&propval)) == 0 &&
|
||||
propval > SPA_OLD_MAXBLOCKSIZE) {
|
||||
error = SET_ERROR(ENOTSUP);
|
||||
} else {
|
||||
objnum = dmu_objset_id(os);
|
||||
|
@ -90,7 +90,7 @@ spa_history_create_obj(spa_t *spa, dmu_tx_t *tx)
|
||||
|
||||
ASSERT(spa->spa_history == 0);
|
||||
spa->spa_history = dmu_object_alloc(mos, DMU_OT_SPA_HISTORY,
|
||||
SPA_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
|
||||
SPA_OLD_MAXBLOCKSIZE, DMU_OT_SPA_HISTORY_OFFSETS,
|
||||
sizeof (spa_history_phys_t), tx);
|
||||
|
||||
VERIFY(zap_add(mos, DMU_POOL_DIRECTORY_OBJECT,
|
||||
|
@ -1963,3 +1963,12 @@ spa_debug_enabled(spa_t *spa)
|
||||
{
|
||||
return (spa->spa_debug);
|
||||
}
|
||||
|
||||
int
|
||||
spa_maxblocksize(spa_t *spa)
|
||||
{
|
||||
if (spa_feature_is_enabled(spa, SPA_FEATURE_LARGE_BLOCKS))
|
||||
return (SPA_MAXBLOCKSIZE);
|
||||
else
|
||||
return (SPA_OLD_MAXBLOCKSIZE);
|
||||
}
|
||||
|
@ -249,7 +249,7 @@ void zfs_znode_byteswap(void *buf, size_t size);
|
||||
* The maximum number of bytes that can be accessed as part of one
|
||||
* operation, including metadata.
|
||||
*/
|
||||
#define DMU_MAX_ACCESS (10<<20) /* 10MB */
|
||||
#define DMU_MAX_ACCESS (32 * 1024 * 1024) /* 32MB */
|
||||
#define DMU_MAX_DELETEBLKCNT (20480) /* ~5MB of indirect blocks */
|
||||
|
||||
#define DMU_USERUSED_OBJECT (-1ULL)
|
||||
@ -637,6 +637,7 @@ void xuio_stat_wbuf_copied();
|
||||
void xuio_stat_wbuf_nocopy();
|
||||
|
||||
extern int zfs_prefetch_disable;
|
||||
extern int zfs_max_recordsize;
|
||||
|
||||
/*
|
||||
* Asynchronously try to read in the data.
|
||||
|
@ -95,6 +95,7 @@ struct objset {
|
||||
zfs_cache_type_t os_secondary_cache;
|
||||
zfs_sync_type_t os_sync;
|
||||
zfs_redundant_metadata_type_t os_redundant_metadata;
|
||||
int os_recordsize;
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
|
@ -37,12 +37,14 @@ struct dsl_dataset;
|
||||
struct drr_begin;
|
||||
struct avl_tree;
|
||||
|
||||
int dmu_send(const char *tosnap, const char *fromsnap, boolean_t embedok,
|
||||
int dmu_send(const char *tosnap, const char *fromsnap,
|
||||
boolean_t embedok, boolean_t large_block_ok,
|
||||
int outfd, struct vnode *vp, offset_t *off);
|
||||
int dmu_send_estimate(struct dsl_dataset *ds, struct dsl_dataset *fromds,
|
||||
uint64_t *sizep);
|
||||
int dmu_send_obj(const char *pool, uint64_t tosnap, uint64_t fromsnap,
|
||||
boolean_t embedok, int outfd, vnode_t *vp, offset_t *off);
|
||||
boolean_t embedok, boolean_t large_block_ok,
|
||||
int outfd, struct vnode *vp, offset_t *off);
|
||||
|
||||
typedef struct dmu_recv_cookie {
|
||||
struct dsl_dataset *drc_ds;
|
||||
|
@ -82,6 +82,13 @@ struct dsl_pool;
|
||||
*/
|
||||
#define DS_FIELD_BOOKMARK_NAMES "com.delphix:bookmarks"
|
||||
|
||||
/*
|
||||
* This field is present (with value=0) if this dataset may contain large
|
||||
* blocks (>128KB). If it is present, then this dataset
|
||||
* is counted in the refcount of the SPA_FEATURE_LARGE_BLOCKS feature.
|
||||
*/
|
||||
#define DS_FIELD_LARGE_BLOCKS "org.open-zfs:large_blocks"
|
||||
|
||||
/*
|
||||
* DS_FLAG_CI_DATASET is set if the dataset contains a file system whose
|
||||
* name lookups should be performed case-insensitively.
|
||||
@ -135,6 +142,8 @@ typedef struct dsl_dataset {
|
||||
/* only used in syncing context, only valid for non-snapshots: */
|
||||
struct dsl_dataset *ds_prev;
|
||||
uint64_t ds_bookmarks; /* DMU_OTN_ZAP_METADATA */
|
||||
boolean_t ds_large_blocks;
|
||||
boolean_t ds_need_large_blocks;
|
||||
|
||||
/* has internal locking: */
|
||||
dsl_deadlist_t ds_deadlist;
|
||||
@ -244,6 +253,8 @@ int dsl_dataset_space_written(dsl_dataset_t *oldsnap, dsl_dataset_t *new,
|
||||
int dsl_dataset_space_wouldfree(dsl_dataset_t *firstsnap, dsl_dataset_t *last,
|
||||
uint64_t *usedp, uint64_t *compp, uint64_t *uncompp);
|
||||
boolean_t dsl_dataset_is_dirty(dsl_dataset_t *ds);
|
||||
int dsl_dataset_activate_large_blocks(const char *dsname);
|
||||
void dsl_dataset_activate_large_blocks_sync_impl(uint64_t dsobj, dmu_tx_t *tx);
|
||||
|
||||
int dsl_dsobj_to_dsname(char *pname, uint64_t obj, char *buf);
|
||||
|
||||
|
@ -94,17 +94,26 @@ _NOTE(CONSTCOND) } while (0)
|
||||
_NOTE(CONSTCOND) } while (0)
|
||||
|
||||
/*
|
||||
* We currently support nine block sizes, from 512 bytes to 128K.
|
||||
* We could go higher, but the benefits are near-zero and the cost
|
||||
* of COWing a giant block to modify one byte would become excessive.
|
||||
* We currently support block sizes from 512 bytes to 16MB.
|
||||
* The benefits of larger blocks, and thus larger IO, need to be weighed
|
||||
* against the cost of COWing a giant block to modify one byte, and the
|
||||
* large latency of reading or writing a large block.
|
||||
*
|
||||
* Note that although blocks up to 16MB are supported, the recordsize
|
||||
* property can not be set larger than zfs_max_recordsize (default 1MB).
|
||||
* See the comment near zfs_max_recordsize in dsl_dataset.c for details.
|
||||
*
|
||||
* Note that although the LSIZE field of the blkptr_t can store sizes up
|
||||
* to 32MB, the dnode's dn_datablkszsec can only store sizes up to
|
||||
* 32MB - 512 bytes. Therefore, we limit SPA_MAXBLOCKSIZE to 16MB.
|
||||
*/
|
||||
#define SPA_MINBLOCKSHIFT 9
|
||||
#define SPA_MAXBLOCKSHIFT 17
|
||||
#define SPA_OLD_MAXBLOCKSHIFT 17
|
||||
#define SPA_MAXBLOCKSHIFT 24
|
||||
#define SPA_MINBLOCKSIZE (1ULL << SPA_MINBLOCKSHIFT)
|
||||
#define SPA_OLD_MAXBLOCKSIZE (1ULL << SPA_OLD_MAXBLOCKSHIFT)
|
||||
#define SPA_MAXBLOCKSIZE (1ULL << SPA_MAXBLOCKSHIFT)
|
||||
|
||||
#define SPA_BLOCKSIZES (SPA_MAXBLOCKSHIFT - SPA_MINBLOCKSHIFT + 1)
|
||||
|
||||
/*
|
||||
* Size of block to hold the configuration data (a packed nvlist)
|
||||
*/
|
||||
@ -781,6 +790,7 @@ extern boolean_t spa_has_slogs(spa_t *spa);
|
||||
extern boolean_t spa_is_root(spa_t *spa);
|
||||
extern boolean_t spa_writeable(spa_t *spa);
|
||||
extern boolean_t spa_has_pending_synctask(spa_t *spa);
|
||||
extern int spa_maxblocksize(spa_t *spa);
|
||||
|
||||
extern int spa_mode(spa_t *spa);
|
||||
extern uint64_t strtonum(const char *str, char **nptr);
|
||||
|
@ -42,8 +42,7 @@ extern int fzap_default_block_shift;
|
||||
|
||||
#define MZAP_ENT_LEN 64
|
||||
#define MZAP_NAME_LEN (MZAP_ENT_LEN - 8 - 4 - 2)
|
||||
#define MZAP_MAX_BLKSHIFT SPA_MAXBLOCKSHIFT
|
||||
#define MZAP_MAX_BLKSZ (1 << MZAP_MAX_BLKSHIFT)
|
||||
#define MZAP_MAX_BLKSZ SPA_OLD_MAXBLOCKSIZE
|
||||
|
||||
#define ZAP_NEED_CD (-1U)
|
||||
|
||||
|
@ -85,13 +85,16 @@ typedef enum drr_headertype {
|
||||
/* flags #3 - #15 are reserved for incompatible closed-source implementations */
|
||||
#define DMU_BACKUP_FEATURE_EMBED_DATA (1<<16)
|
||||
#define DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 (1<<17)
|
||||
/* flag #18 is reserved for a Delphix feature */
|
||||
#define DMU_BACKUP_FEATURE_LARGE_BLOCKS (1<<19)
|
||||
|
||||
/*
|
||||
* Mask of all supported backup features
|
||||
*/
|
||||
#define DMU_BACKUP_FEATURE_MASK (DMU_BACKUP_FEATURE_DEDUP | \
|
||||
DMU_BACKUP_FEATURE_DEDUPPROPS | DMU_BACKUP_FEATURE_SA_SPILL | \
|
||||
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4)
|
||||
DMU_BACKUP_FEATURE_EMBED_DATA | DMU_BACKUP_FEATURE_EMBED_DATA_LZ4 | \
|
||||
DMU_BACKUP_FEATURE_LARGE_BLOCKS)
|
||||
|
||||
/* Are all features in the given flag word currently supported? */
|
||||
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
|
||||
|
@ -136,8 +136,6 @@ extern "C" {
|
||||
#define ZFS_SHARES_DIR "SHARES"
|
||||
#define ZFS_SA_ATTRS "SA_ATTRS"
|
||||
|
||||
#define ZFS_MAX_BLOCKSIZE (SPA_MAXBLOCKSIZE)
|
||||
|
||||
/*
|
||||
* Path component length
|
||||
*
|
||||
|
@ -90,7 +90,6 @@ typedef struct zil_chain {
|
||||
} zil_chain_t;
|
||||
|
||||
#define ZIL_MIN_BLKSZ 4096ULL
|
||||
#define ZIL_MAX_BLKSZ SPA_MAXBLOCKSIZE
|
||||
|
||||
/*
|
||||
* The words of a log block checksum.
|
||||
|
@ -139,7 +139,7 @@ typedef struct zil_bp_node {
|
||||
avl_node_t zn_node;
|
||||
} zil_bp_node_t;
|
||||
|
||||
#define ZIL_MAX_LOG_DATA (SPA_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
|
||||
#define ZIL_MAX_LOG_DATA (SPA_OLD_MAXBLOCKSIZE - sizeof (zil_chain_t) - \
|
||||
sizeof (lr_write_t))
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -828,9 +828,9 @@ vdev_metaslab_init(vdev_t *vd, uint64_t txg)
|
||||
|
||||
/*
|
||||
* Compute the raidz-deflation ratio. Note, we hard-code
|
||||
* in 128k (1 << 17) because it is the current "typical" blocksize.
|
||||
* Even if SPA_MAXBLOCKSIZE changes, this algorithm must never change,
|
||||
* or we will inconsistently account for existing bp's.
|
||||
* in 128k (1 << 17) because it is the "typical" blocksize.
|
||||
* Even though SPA_MAXBLOCKSIZE changed, this algorithm can not change,
|
||||
* otherwise it would inconsistently account for existing bp's.
|
||||
*/
|
||||
vd->vdev_deflate_ratio = (1 << 17) /
|
||||
(vdev_psize_to_asize(vd, 1 << 17) >> SPA_MINBLOCKSHIFT);
|
||||
|
@ -162,7 +162,7 @@ int zfs_vdev_async_write_active_max_dirty_percent = 60;
|
||||
* we include spans of optional I/Os to aid aggregation at the disk even when
|
||||
* they aren't able to help us aggregate at this level.
|
||||
*/
|
||||
int zfs_vdev_aggregation_limit = SPA_MAXBLOCKSIZE;
|
||||
int zfs_vdev_aggregation_limit = SPA_OLD_MAXBLOCKSIZE;
|
||||
int zfs_vdev_read_gap_limit = 32 << 10;
|
||||
int zfs_vdev_write_gap_limit = 4 << 10;
|
||||
|
||||
|
@ -1604,7 +1604,7 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size,
|
||||
/*
|
||||
* Don't write past the end of the block
|
||||
*/
|
||||
VERIFY3U(offset + size, <=, origoffset + SPA_MAXBLOCKSIZE);
|
||||
VERIFY3U(offset + size, <=, origoffset + SPA_OLD_MAXBLOCKSIZE);
|
||||
|
||||
start = offset;
|
||||
end = start + size;
|
||||
@ -1619,8 +1619,8 @@ vdev_raidz_physio(vdev_t *vd, caddr_t data, size_t size,
|
||||
* KB size.
|
||||
*/
|
||||
rm = vdev_raidz_map_alloc(data - (offset - origoffset),
|
||||
SPA_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift, vd->vdev_children,
|
||||
vd->vdev_nparity);
|
||||
SPA_OLD_MAXBLOCKSIZE, origoffset, tvd->vdev_ashift,
|
||||
vd->vdev_children, vd->vdev_nparity);
|
||||
|
||||
coloffset = origoffset;
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <sys/zap_leaf.h>
|
||||
#include <sys/avl.h>
|
||||
#include <sys/arc.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/sunddi.h>
|
||||
@ -653,9 +654,9 @@ zap_create_flags(objset_t *os, int normflags, zap_flags_t flags,
|
||||
uint64_t obj = dmu_object_alloc(os, ot, 0, bonustype, bonuslen, tx);
|
||||
|
||||
ASSERT(leaf_blockshift >= SPA_MINBLOCKSHIFT &&
|
||||
leaf_blockshift <= SPA_MAXBLOCKSHIFT &&
|
||||
leaf_blockshift <= SPA_OLD_MAXBLOCKSHIFT &&
|
||||
indirect_blockshift >= SPA_MINBLOCKSHIFT &&
|
||||
indirect_blockshift <= SPA_MAXBLOCKSHIFT);
|
||||
indirect_blockshift <= SPA_OLD_MAXBLOCKSHIFT);
|
||||
|
||||
VERIFY(dmu_object_set_blocksize(os, obj,
|
||||
1ULL << leaf_blockshift, indirect_blockshift, tx) == 0);
|
||||
@ -1345,7 +1346,6 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
zap_t *zap;
|
||||
int err = 0;
|
||||
|
||||
|
||||
/*
|
||||
* Since, we don't have a name, we cannot figure out which blocks will
|
||||
* be affected in this operation. So, account for the worst case :
|
||||
@ -1358,7 +1358,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
* large microzap results in a promotion to fatzap.
|
||||
*/
|
||||
if (name == NULL) {
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE;
|
||||
return (err);
|
||||
}
|
||||
|
||||
@ -1382,7 +1382,7 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
/*
|
||||
* We treat this case as similar to (name == NULL)
|
||||
*/
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_MAXBLOCKSIZE;
|
||||
*towrite += (3 + (add ? 4 : 0)) * SPA_OLD_MAXBLOCKSIZE;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
@ -1401,12 +1401,12 @@ zap_count_write(objset_t *os, uint64_t zapobj, const char *name, int add,
|
||||
* ptrtbl blocks
|
||||
*/
|
||||
if (dmu_buf_freeable(zap->zap_dbuf))
|
||||
*tooverwrite += SPA_MAXBLOCKSIZE;
|
||||
*tooverwrite += MZAP_MAX_BLKSZ;
|
||||
else
|
||||
*towrite += SPA_MAXBLOCKSIZE;
|
||||
*towrite += MZAP_MAX_BLKSZ;
|
||||
|
||||
if (add) {
|
||||
*towrite += 4 * SPA_MAXBLOCKSIZE;
|
||||
*towrite += 4 * MZAP_MAX_BLKSZ;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2383,7 +2383,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
|
||||
const char *propname = nvpair_name(pair);
|
||||
zfs_prop_t prop = zfs_name_to_prop(propname);
|
||||
uint64_t intval;
|
||||
int err;
|
||||
int err = -1;
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (zfs_prop_userquota(propname))
|
||||
@ -3772,8 +3772,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
* the SPA supports it. We ignore any errors here since
|
||||
* we'll catch them later.
|
||||
*/
|
||||
if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
|
||||
nvpair_value_uint64(pair, &intval) == 0) {
|
||||
if (nvpair_value_uint64(pair, &intval) == 0) {
|
||||
if (intval >= ZIO_COMPRESS_GZIP_1 &&
|
||||
intval <= ZIO_COMPRESS_GZIP_9 &&
|
||||
zfs_earlier_version(dsname,
|
||||
@ -3824,6 +3823,42 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
break;
|
||||
|
||||
case ZFS_PROP_RECORDSIZE:
|
||||
/* Record sizes above 128k need the feature to be enabled */
|
||||
if (nvpair_value_uint64(pair, &intval) == 0 &&
|
||||
intval > SPA_OLD_MAXBLOCKSIZE) {
|
||||
spa_t *spa;
|
||||
|
||||
/*
|
||||
* If this is a bootable dataset then
|
||||
* the we don't allow large (>128K) blocks,
|
||||
* because GRUB doesn't support them.
|
||||
*/
|
||||
if (zfs_is_bootfs(dsname) &&
|
||||
intval > SPA_OLD_MAXBLOCKSIZE) {
|
||||
return (SET_ERROR(EDOM));
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't allow setting the property above 1MB,
|
||||
* unless the tunable has been changed.
|
||||
*/
|
||||
if (intval > zfs_max_recordsize ||
|
||||
intval > SPA_MAXBLOCKSIZE)
|
||||
return (SET_ERROR(EDOM));
|
||||
|
||||
if ((err = spa_open(dsname, &spa, FTAG)) != 0)
|
||||
return (err);
|
||||
|
||||
if (!spa_feature_is_enabled(spa,
|
||||
SPA_FEATURE_LARGE_BLOCKS)) {
|
||||
spa_close(spa, FTAG);
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
}
|
||||
spa_close(spa, FTAG);
|
||||
}
|
||||
break;
|
||||
|
||||
case ZFS_PROP_SHARESMB:
|
||||
if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
|
||||
return (SET_ERROR(ENOTSUP));
|
||||
@ -4245,7 +4280,7 @@ out:
|
||||
* zc_fromobj objsetid of incremental fromsnap (may be zero)
|
||||
* zc_guid if set, estimate size of stream only. zc_cookie is ignored.
|
||||
* output size in zc_objset_type.
|
||||
* zc_flags if =1, WRITE_EMBEDDED records are permitted
|
||||
* zc_flags lzc_send_flags
|
||||
*
|
||||
* outputs:
|
||||
* zc_objset_type estimated size, if zc_guid is set
|
||||
@ -4257,6 +4292,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
offset_t off;
|
||||
boolean_t estimate = (zc->zc_guid != 0);
|
||||
boolean_t embedok = (zc->zc_flags & 0x1);
|
||||
boolean_t large_block_ok = (zc->zc_flags & 0x2);
|
||||
|
||||
if (zc->zc_obj != 0) {
|
||||
dsl_pool_t *dp;
|
||||
@ -4317,7 +4353,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
|
||||
zc->zc_fromobj, embedok, zc->zc_cookie, fp->f_vnode, &off);
|
||||
zc->zc_fromobj, embedok, large_block_ok,
|
||||
zc->zc_cookie, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
@ -5219,6 +5256,8 @@ zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
* innvl: {
|
||||
* "fd" -> file descriptor to write stream to (int32)
|
||||
* (optional) "fromsnap" -> full snap name to send an incremental from
|
||||
* (optional) "largeblockok" -> (value ignored)
|
||||
* indicates that blocks > 128KB are permitted
|
||||
* (optional) "embedok" -> (value ignored)
|
||||
* presence indicates DRR_WRITE_EMBEDDED records are permitted
|
||||
* }
|
||||
@ -5233,6 +5272,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
offset_t off;
|
||||
char *fromname = NULL;
|
||||
int fd;
|
||||
boolean_t largeblockok;
|
||||
boolean_t embedok;
|
||||
|
||||
error = nvlist_lookup_int32(innvl, "fd", &fd);
|
||||
@ -5241,6 +5281,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
|
||||
(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
|
||||
|
||||
largeblockok = nvlist_exists(innvl, "largeblockok");
|
||||
embedok = nvlist_exists(innvl, "embedok");
|
||||
|
||||
file_t *fp = getf(fd);
|
||||
@ -5248,7 +5289,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
return (SET_ERROR(EBADF));
|
||||
|
||||
off = fp->f_offset;
|
||||
error = dmu_send(snapname, fromname, embedok, fd, fp->f_vnode, &off);
|
||||
error = dmu_send(snapname, fromname, embedok, largeblockok,
|
||||
fd, fp->f_vnode, &off);
|
||||
|
||||
if (VOP_SEEK(fp->f_vnode, fp->f_offset, &off, NULL) == 0)
|
||||
fp->f_offset = off;
|
||||
|
@ -485,7 +485,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
|
||||
* If the write would overflow the largest block then split it.
|
||||
*/
|
||||
if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
|
||||
len = SPA_MAXBLOCKSIZE >> 1;
|
||||
len = SPA_OLD_MAXBLOCKSIZE >> 1;
|
||||
else
|
||||
len = resid;
|
||||
|
||||
|
@ -272,10 +272,9 @@ static void
|
||||
blksz_changed_cb(void *arg, uint64_t newval)
|
||||
{
|
||||
zfsvfs_t *zfsvfs = arg;
|
||||
|
||||
if (newval < SPA_MINBLOCKSIZE ||
|
||||
newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
|
||||
newval = SPA_MAXBLOCKSIZE;
|
||||
ASSERT3U(newval, <=, spa_maxblocksize(dmu_objset_spa(zfsvfs->z_os)));
|
||||
ASSERT3U(newval, >=, SPA_MINBLOCKSIZE);
|
||||
ASSERT(ISP2(newval));
|
||||
|
||||
zfsvfs->z_max_blksz = newval;
|
||||
zfsvfs->z_vfs->vfs_bsize = newval;
|
||||
@ -906,7 +905,7 @@ zfsvfs_create(const char *osname, zfsvfs_t **zfvp)
|
||||
*/
|
||||
zfsvfs->z_vfs = NULL;
|
||||
zfsvfs->z_parent = zfsvfs;
|
||||
zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
|
||||
zfsvfs->z_max_blksz = SPA_OLD_MAXBLOCKSIZE;
|
||||
zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
|
||||
zfsvfs->z_os = os;
|
||||
|
||||
|
@ -817,8 +817,14 @@ zfs_write(vnode_t *vp, uio_t *uio, int ioflag, cred_t *cr, caller_context_t *ct)
|
||||
uint64_t new_blksz;
|
||||
|
||||
if (zp->z_blksz > max_blksz) {
|
||||
/*
|
||||
* File's blocksize is already larger than the
|
||||
* "recordsize" property. Only let it grow to
|
||||
* the next power of 2.
|
||||
*/
|
||||
ASSERT(!ISP2(zp->z_blksz));
|
||||
new_blksz = MIN(end_size, SPA_MAXBLOCKSIZE);
|
||||
new_blksz = MIN(end_size,
|
||||
1 << highbit64(zp->z_blksz));
|
||||
} else {
|
||||
new_blksz = MIN(end_size, max_blksz);
|
||||
}
|
||||
|
@ -58,6 +58,7 @@
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dmu_objset.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/zap.h>
|
||||
@ -1474,8 +1475,13 @@ zfs_extend(znode_t *zp, uint64_t end)
|
||||
* We are growing the file past the current block size.
|
||||
*/
|
||||
if (zp->z_blksz > zp->z_zfsvfs->z_max_blksz) {
|
||||
/*
|
||||
* File's blocksize is already larger than the
|
||||
* "recordsize" property. Only let it grow to
|
||||
* the next power of 2.
|
||||
*/
|
||||
ASSERT(!ISP2(zp->z_blksz));
|
||||
newblksz = MIN(end, SPA_MAXBLOCKSIZE);
|
||||
newblksz = MIN(end, 1 << highbit64(zp->z_blksz));
|
||||
} else {
|
||||
newblksz = MIN(end, zp->z_zfsvfs->z_max_blksz);
|
||||
}
|
||||
|
@ -220,6 +220,7 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
|
||||
sizeof (cksum)) || BP_IS_HOLE(&zilc->zc_next_blk)) {
|
||||
error = SET_ERROR(ECKSUM);
|
||||
} else {
|
||||
ASSERT3U(len, <=, SPA_OLD_MAXBLOCKSIZE);
|
||||
bcopy(lr, dst, len);
|
||||
*end = (char *)dst + len;
|
||||
*nbp = zilc->zc_next_blk;
|
||||
@ -234,6 +235,8 @@ zil_read_log_block(zilog_t *zilog, const blkptr_t *bp, blkptr_t *nbp, void *dst,
|
||||
(zilc->zc_nused > (size - sizeof (*zilc)))) {
|
||||
error = SET_ERROR(ECKSUM);
|
||||
} else {
|
||||
ASSERT3U(zilc->zc_nused, <=,
|
||||
SPA_OLD_MAXBLOCKSIZE);
|
||||
bcopy(lr, dst, zilc->zc_nused);
|
||||
*end = (char *)dst + zilc->zc_nused;
|
||||
*nbp = zilc->zc_next_blk;
|
||||
@ -317,7 +320,7 @@ zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
|
||||
* If the log has been claimed, stop if we encounter a sequence
|
||||
* number greater than the highest claimed sequence number.
|
||||
*/
|
||||
lrbuf = zio_buf_alloc(SPA_MAXBLOCKSIZE);
|
||||
lrbuf = zio_buf_alloc(SPA_OLD_MAXBLOCKSIZE);
|
||||
zil_bp_tree_init(zilog);
|
||||
|
||||
for (blk = zh->zh_log; !BP_IS_HOLE(&blk); blk = next_blk) {
|
||||
@ -364,7 +367,7 @@ done:
|
||||
(max_blk_seq == claim_blk_seq && max_lr_seq == claim_lr_seq));
|
||||
|
||||
zil_bp_tree_fini(zilog);
|
||||
zio_buf_free(lrbuf, SPA_MAXBLOCKSIZE);
|
||||
zio_buf_free(lrbuf, SPA_OLD_MAXBLOCKSIZE);
|
||||
|
||||
return (error);
|
||||
}
|
||||
@ -896,7 +899,7 @@ zil_lwb_write_init(zilog_t *zilog, lwb_t *lwb)
|
||||
*
|
||||
* These must be a multiple of 4KB. Note only the amount used (again
|
||||
* aligned to 4KB) actually gets written. However, we can't always just
|
||||
* allocate SPA_MAXBLOCKSIZE as the slog space could be exhausted.
|
||||
* allocate SPA_OLD_MAXBLOCKSIZE as the slog space could be exhausted.
|
||||
*/
|
||||
uint64_t zil_block_buckets[] = {
|
||||
4096, /* non TX_WRITE */
|
||||
@ -978,7 +981,7 @@ zil_lwb_write_start(zilog_t *zilog, lwb_t *lwb)
|
||||
continue;
|
||||
zil_blksz = zil_block_buckets[i];
|
||||
if (zil_blksz == UINT64_MAX)
|
||||
zil_blksz = SPA_MAXBLOCKSIZE;
|
||||
zil_blksz = SPA_OLD_MAXBLOCKSIZE;
|
||||
zilog->zl_prev_blks[zilog->zl_prev_rotor] = zil_blksz;
|
||||
for (i = 0; i < ZIL_PREV_BLKS; i++)
|
||||
zil_blksz = MAX(zil_blksz, zilog->zl_prev_blks[i]);
|
||||
|
@ -112,9 +112,8 @@ zio_init(void)
|
||||
|
||||
/*
|
||||
* For small buffers, we want a cache for each multiple of
|
||||
* SPA_MINBLOCKSIZE. For medium-size buffers, we want a cache
|
||||
* for each quarter-power of 2. For large buffers, we want
|
||||
* a cache for each multiple of PAGESIZE.
|
||||
* SPA_MINBLOCKSIZE. For larger buffers, we want a cache
|
||||
* for each quarter-power of 2.
|
||||
*/
|
||||
for (c = 0; c < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; c++) {
|
||||
size_t size = (c + 1) << SPA_MINBLOCKSHIFT;
|
||||
@ -137,10 +136,8 @@ zio_init(void)
|
||||
#endif
|
||||
if (size <= 4 * SPA_MINBLOCKSIZE) {
|
||||
align = SPA_MINBLOCKSIZE;
|
||||
} else if (IS_P2ALIGNED(size, PAGESIZE)) {
|
||||
align = PAGESIZE;
|
||||
} else if (IS_P2ALIGNED(size, p2 >> 2)) {
|
||||
align = p2 >> 2;
|
||||
align = MIN(p2 >> 2, PAGESIZE);
|
||||
}
|
||||
|
||||
if (align != 0) {
|
||||
|
@ -191,7 +191,7 @@ int
|
||||
zvol_check_volblocksize(uint64_t volblocksize)
|
||||
{
|
||||
if (volblocksize < SPA_MINBLOCKSIZE ||
|
||||
volblocksize > SPA_MAXBLOCKSIZE ||
|
||||
volblocksize > SPA_OLD_MAXBLOCKSIZE ||
|
||||
!ISP2(volblocksize))
|
||||
return (SET_ERROR(EDOM));
|
||||
|
||||
@ -692,7 +692,7 @@ zvol_prealloc(zvol_state_t *zv)
|
||||
|
||||
while (resid != 0) {
|
||||
int error;
|
||||
uint64_t bytes = MIN(resid, SPA_MAXBLOCKSIZE);
|
||||
uint64_t bytes = MIN(resid, SPA_OLD_MAXBLOCKSIZE);
|
||||
|
||||
tx = dmu_tx_create(os);
|
||||
dmu_tx_hold_write(tx, ZVOL_OBJ, off, bytes);
|
||||
@ -1661,7 +1661,8 @@ zvol_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *cr, int *rvalp)
|
||||
(void) strcpy(dki.dki_dname, "zvol");
|
||||
dki.dki_ctype = DKC_UNKNOWN;
|
||||
dki.dki_unit = getminor(dev);
|
||||
dki.dki_maxtransfer = 1 << (SPA_MAXBLOCKSHIFT - zv->zv_min_bs);
|
||||
dki.dki_maxtransfer =
|
||||
1 << (SPA_OLD_MAXBLOCKSHIFT - zv->zv_min_bs);
|
||||
mutex_exit(&zfsdev_state_lock);
|
||||
if (ddi_copyout(&dki, (void *)arg, sizeof (dki), flag))
|
||||
error = SET_ERROR(EFAULT);
|
||||
@ -1978,14 +1979,14 @@ zvol_dump_init(zvol_state_t *zv, boolean_t resize)
|
||||
zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), 8, 1,
|
||||
&vbs, tx);
|
||||
error = error ? error : dmu_object_set_blocksize(
|
||||
os, ZVOL_OBJ, SPA_MAXBLOCKSIZE, 0, tx);
|
||||
os, ZVOL_OBJ, SPA_OLD_MAXBLOCKSIZE, 0, tx);
|
||||
if (version >= SPA_VERSION_DEDUP) {
|
||||
error = error ? error : zap_update(os, ZVOL_ZAP_OBJ,
|
||||
zfs_prop_to_name(ZFS_PROP_DEDUP), 8, 1,
|
||||
&dedup, tx);
|
||||
}
|
||||
if (error == 0)
|
||||
zv->zv_volblocksize = SPA_MAXBLOCKSIZE;
|
||||
zv->zv_volblocksize = SPA_OLD_MAXBLOCKSIZE;
|
||||
}
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
|
@ -192,6 +192,7 @@ typedef enum {
|
||||
ZPOOL_PROP_FREEING,
|
||||
ZPOOL_PROP_FRAGMENTATION,
|
||||
ZPOOL_PROP_LEAKED,
|
||||
ZPOOL_PROP_MAXBLOCKSIZE,
|
||||
ZPOOL_NUM_PROPS
|
||||
} zpool_prop_t;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user