zfs: merge openzfs/zfs@3b89d9518 (master) into main

Notable upstream pull request merges:
  #12022 Fix endianness issues with zstd
  #12319 Extend zpool-iostat to account for ZIO_PRIORITY_REBUILD
  #12458 Add hole punching support on FreeBSD version 1400032
  #12473 Initialize parity blocks before RAID-Z reconstruction benchmarking
  #12511 Make 'zpool labelclear -f' work on offlined disks
  #12514 FreeBSD: Don't remove SA xattr if not SA znode

Obtained from:	OpenZFS
OpenZFS commit:	3b89d9518d
This commit is contained in:
Martin Matuska 2021-08-31 08:58:27 +02:00
commit 21b492ed51
31 changed files with 407 additions and 66 deletions

View File

@ -140,7 +140,8 @@ Usage: vdev_id [-h]
-p number of phy's per switch port [default=$PHYS_PER_PORT]
-h show this summary
EOF
exit 0
exit 1
# exit with error to avoid processing usage message by a udev rule
}
map_slot() {
@ -728,7 +729,7 @@ done
if [ ! -r "$CONFIG" ] ; then
echo "Error: Config file \"$CONFIG\" not found"
exit 0
exit 1
fi
if [ -z "$DEV" ] && [ -z "$ENCLOSURE_MODE" ] ; then

View File

@ -2259,7 +2259,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:EMBEDDED",
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
zfs_get_hdrlevel(&zstd_hdr));
return;
}
@ -2283,7 +2284,8 @@ snprintf_zstd_header(spa_t *spa, char *blkbuf, size_t buflen,
(void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf),
" ZSTD:size=%u:version=%u:level=%u:NORMAL",
zstd_hdr.c_len, zstd_hdr.version, zstd_hdr.level);
zstd_hdr.c_len, zfs_get_hdrversion(&zstd_hdr),
zfs_get_hdrlevel(&zstd_hdr));
abd_return_buf_copy(pabd, buf, BP_GET_LSIZE(bp));
}

View File

@ -211,7 +211,7 @@ enum iostat_type {
* of all the nvlists a flag requires. Also specifies the order in
* which data gets printed in zpool iostat.
*/
static const char *vsx_type_to_nvlist[IOS_COUNT][13] = {
static const char *vsx_type_to_nvlist[IOS_COUNT][15] = {
[IOS_L_HISTO] = {
ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_TOT_W_LAT_HISTO,
@ -223,6 +223,7 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][13] = {
ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO,
ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO,
NULL},
[IOS_LATENCY] = {
ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
@ -230,6 +231,7 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][13] = {
ZPOOL_CONFIG_VDEV_DISK_R_LAT_HISTO,
ZPOOL_CONFIG_VDEV_DISK_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO,
ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO,
NULL},
[IOS_QUEUES] = {
ZPOOL_CONFIG_VDEV_SYNC_R_ACTIVE_QUEUE,
@ -238,6 +240,7 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][13] = {
ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE,
NULL},
[IOS_RQ_HISTO] = {
ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,
@ -252,6 +255,8 @@ static const char *vsx_type_to_nvlist[IOS_COUNT][13] = {
ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO,
ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO,
ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO,
ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO,
ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO,
NULL},
};
@ -1214,6 +1219,26 @@ zpool_do_remove(int argc, char **argv)
return (ret);
}
/*
* Return 1 if a vdev is active (being used in a pool)
* Return 0 if a vdev is inactive (offlined or faulted, or not in active pool)
*
* This is useful for checking if a disk in an active pool is offlined or
* faulted.
*/
static int
vdev_is_active(char *vdev_path)
{
int fd;
fd = open(vdev_path, O_EXCL);
if (fd < 0) {
return (1); /* cant open O_EXCL - disk is active */
}
close(fd);
return (0); /* disk is inactive in the pool */
}
/*
* zpool labelclear [-f] <vdev>
*
@ -1323,9 +1348,23 @@ zpool_do_labelclear(int argc, char **argv)
case POOL_STATE_ACTIVE:
case POOL_STATE_SPARE:
case POOL_STATE_L2CACHE:
/*
* We allow the user to call 'zpool offline -f'
* on an offlined disk in an active pool. We can check if
* the disk is online by calling vdev_is_active().
*/
if (force && !vdev_is_active(vdev))
break;
(void) fprintf(stderr, gettext(
"%s is a member (%s) of pool \"%s\"\n"),
"%s is a member (%s) of pool \"%s\""),
vdev, zpool_pool_state_to_name(state), name);
if (force) {
(void) fprintf(stderr, gettext(
". Offline the disk first to clear its label."));
}
printf("\n");
ret = 1;
goto errout;
@ -3844,22 +3883,23 @@ typedef struct name_and_columns {
unsigned int columns; /* Center name to this number of columns */
} name_and_columns_t;
#define IOSTAT_MAX_LABELS 13 /* Max number of labels on one line */
#define IOSTAT_MAX_LABELS 15 /* Max number of labels on one line */
static const name_and_columns_t iostat_top_labels[][IOSTAT_MAX_LABELS] =
{
[IOS_DEFAULT] = {{"capacity", 2}, {"operations", 2}, {"bandwidth", 2},
{NULL}},
[IOS_LATENCY] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2},
{"asyncq_wait", 2}, {"scrub", 1}, {"trim", 1}, {NULL}},
{"asyncq_wait", 2}, {"scrub", 1}, {"trim", 1}, {"rebuild", 1},
{NULL}},
[IOS_QUEUES] = {{"syncq_read", 2}, {"syncq_write", 2},
{"asyncq_read", 2}, {"asyncq_write", 2}, {"scrubq_read", 2},
{"trimq_write", 2}, {NULL}},
{"trimq_write", 2}, {"rebuildq_write", 2}, {NULL}},
[IOS_L_HISTO] = {{"total_wait", 2}, {"disk_wait", 2}, {"syncq_wait", 2},
{"asyncq_wait", 2}, {NULL}},
[IOS_RQ_HISTO] = {{"sync_read", 2}, {"sync_write", 2},
{"async_read", 2}, {"async_write", 2}, {"scrub", 2},
{"trim", 2}, {NULL}},
{"trim", 2}, {"rebuild", 2}, {NULL}},
};
/* Shorthand - if "columns" field not set, default to 1 column */
@ -3868,14 +3908,17 @@ static const name_and_columns_t iostat_bottom_labels[][IOSTAT_MAX_LABELS] =
[IOS_DEFAULT] = {{"alloc"}, {"free"}, {"read"}, {"write"}, {"read"},
{"write"}, {NULL}},
[IOS_LATENCY] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
{"write"}, {"read"}, {"write"}, {"wait"}, {"wait"}, {NULL}},
{"write"}, {"read"}, {"write"}, {"wait"}, {"wait"}, {"wait"},
{NULL}},
[IOS_QUEUES] = {{"pend"}, {"activ"}, {"pend"}, {"activ"}, {"pend"},
{"activ"}, {"pend"}, {"activ"}, {"pend"}, {"activ"},
{"pend"}, {"activ"}, {NULL}},
{"pend"}, {"activ"}, {"pend"}, {"activ"}, {NULL}},
[IOS_L_HISTO] = {{"read"}, {"write"}, {"read"}, {"write"}, {"read"},
{"write"}, {"read"}, {"write"}, {"scrub"}, {"trim"}, {NULL}},
{"write"}, {"read"}, {"write"}, {"scrub"}, {"trim"}, {"rebuild"},
{NULL}},
[IOS_RQ_HISTO] = {{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"},
{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"}, {NULL}},
{"ind"}, {"agg"}, {"ind"}, {"agg"}, {"ind"}, {"agg"},
{"ind"}, {"agg"}, {NULL}},
};
static const char *histo_to_title[] = {
@ -4507,6 +4550,8 @@ print_iostat_queues(iostat_cbdata_t *cb, nvlist_t *oldnv,
ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_TRIM_PEND_QUEUE,
ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE,
ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE,
ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE,
};
struct stat_array *nva;
@ -4546,6 +4591,7 @@ print_iostat_latency(iostat_cbdata_t *cb, nvlist_t *oldnv,
ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO,
ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO,
ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO,
ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO,
};
struct stat_array *nva;

View File

@ -411,6 +411,7 @@ print_vdev_latency_stats(nvlist_t *nvroot, const char *pool_name,
#ifdef ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO
{ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO, "trim", 0},
#endif
{ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO, "rebuild", 0},
{NULL, NULL}
};
@ -506,6 +507,8 @@ print_vdev_size_stats(nvlist_t *nvroot, const char *pool_name,
{ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO, "trim_write_ind"},
{ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO, "trim_write_agg"},
#endif
{ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO, "rebuild_write_ind"},
{ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO, "rebuild_write_agg"},
{NULL, NULL}
};
@ -585,11 +588,13 @@ print_queue_stats(nvlist_t *nvroot, const char *pool_name,
{ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active"},
{ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active"},
{ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active"},
{ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active"},
{ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend"},
{ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend"},
{ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend"},
{ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend"},
{ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend"},
{ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend"},
{NULL, NULL}
};
@ -636,11 +641,13 @@ print_top_level_vdev_stats(nvlist_t *nvroot, const char *pool_name)
{ZPOOL_CONFIG_VDEV_ASYNC_R_ACTIVE_QUEUE, "async_r_active_queue"},
{ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE, "async_w_active_queue"},
{ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE, "async_scrub_active_queue"},
{ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE, "rebuild_active_queue"},
{ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE, "sync_r_pend_queue"},
{ZPOOL_CONFIG_VDEV_SYNC_W_PEND_QUEUE, "sync_w_pend_queue"},
{ZPOOL_CONFIG_VDEV_ASYNC_R_PEND_QUEUE, "async_r_pend_queue"},
{ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE, "async_w_pend_queue"},
{ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE, "async_scrub_pend_queue"},
{ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE, "rebuild_pend_queue"},
{NULL, NULL}
};

View File

@ -120,7 +120,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLKDEV_BDEV_CHECK_MEDIA_CHANGE], [
])
AC_DEFUN([ZFS_AC_KERNEL_BLKDEV_BDEV_CHECK_MEDIA_CHANGE], [
AC_MSG_CHECKING([whether bdev_disk_changed() exists])
AC_MSG_CHECKING([whether bdev_check_media_change() exists])
ZFS_LINUX_TEST_RESULT([bdev_check_media_change], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_BDEV_CHECK_MEDIA_CHANGE, 1,

View File

@ -643,6 +643,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_W_ACTIVE_QUEUE "vdev_async_w_active_queue"
#define ZPOOL_CONFIG_VDEV_SCRUB_ACTIVE_QUEUE "vdev_async_scrub_active_queue"
#define ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE "vdev_async_trim_active_queue"
#define ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE "vdev_rebuild_active_queue"
/* Queue sizes */
#define ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE "vdev_sync_r_pend_queue"
@ -651,6 +652,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_W_PEND_QUEUE "vdev_async_w_pend_queue"
#define ZPOOL_CONFIG_VDEV_SCRUB_PEND_QUEUE "vdev_async_scrub_pend_queue"
#define ZPOOL_CONFIG_VDEV_TRIM_PEND_QUEUE "vdev_async_trim_pend_queue"
#define ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE "vdev_rebuild_pend_queue"
/* Latency read/write histogram stats */
#define ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO "vdev_tot_r_lat_histo"
@ -663,6 +665,7 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_W_LAT_HISTO "vdev_async_w_lat_histo"
#define ZPOOL_CONFIG_VDEV_SCRUB_LAT_HISTO "vdev_scrub_histo"
#define ZPOOL_CONFIG_VDEV_TRIM_LAT_HISTO "vdev_trim_histo"
#define ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO "vdev_rebuild_histo"
/* Request size histograms */
#define ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO "vdev_sync_ind_r_histo"
@ -671,12 +674,14 @@ typedef struct zpool_load_policy {
#define ZPOOL_CONFIG_VDEV_ASYNC_IND_W_HISTO "vdev_async_ind_w_histo"
#define ZPOOL_CONFIG_VDEV_IND_SCRUB_HISTO "vdev_ind_scrub_histo"
#define ZPOOL_CONFIG_VDEV_IND_TRIM_HISTO "vdev_ind_trim_histo"
#define ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO "vdev_ind_rebuild_histo"
#define ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO "vdev_sync_agg_r_histo"
#define ZPOOL_CONFIG_VDEV_SYNC_AGG_W_HISTO "vdev_sync_agg_w_histo"
#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_R_HISTO "vdev_async_agg_r_histo"
#define ZPOOL_CONFIG_VDEV_ASYNC_AGG_W_HISTO "vdev_async_agg_w_histo"
#define ZPOOL_CONFIG_VDEV_AGG_SCRUB_HISTO "vdev_agg_scrub_histo"
#define ZPOOL_CONFIG_VDEV_AGG_TRIM_HISTO "vdev_agg_trim_histo"
#define ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO "vdev_agg_rebuild_histo"
/* Number of slow IOs */
#define ZPOOL_CONFIG_VDEV_SLOW_IOS "vdev_slow_ios"

View File

@ -56,21 +56,24 @@ typedef struct zfs_zstd_header {
/*
* Version and compression level
* We use a union to be able to big endian encode a single 32 bit
* unsigned integer, but still access the individual bitmasked
* components easily.
* We used to use a union to reference compression level
* and version easily, but as it turns out, relying on the
* ordering of bitfields is not remotely portable.
* So now we have get/set functions in zfs_zstd.c for
* manipulating this in just the right way forever.
*/
union {
uint32_t raw_version_level;
struct {
uint32_t version : 24;
uint8_t level;
};
};
uint32_t raw_version_level;
char data[];
} zfs_zstdhdr_t;
/*
* Simple struct to pass the data from raw_version_level around.
*/
typedef struct zfs_zstd_meta {
uint8_t level;
uint32_t version;
} zfs_zstdmeta_t;
/*
* kstat helper macros
*/
@ -94,6 +97,129 @@ int zfs_zstd_decompress(void *s_start, void *d_start, size_t s_len,
size_t d_len, int n);
void zfs_zstd_cache_reap_now(void);
/*
* So, the reason we have all these complicated set/get functions is that
* originally, in the zstd "header" we wrote out to disk, we used a 32-bit
* bitfield to store the "level" (8 bits) and "version" (24 bits).
*
* Unfortunately, bitfields make few promises about how they're arranged in
* memory...
*
* By way of example, if we were using version 1.4.5 and level 3, it'd be
* level = 0x03, version = 10405/0x0028A5, which gets broken into Vhigh = 0x00,
* Vmid = 0x28, Vlow = 0xA5. We include these positions below to help follow
* which data winds up where.
*
* As a consequence, we wound up with little endian platforms with a layout
* like this in memory:
*
* 0 8 16 24 32
* +-------+-------+-------+-------+
* | Vlow | Vmid | Vhigh | level |
* +-------+-------+-------+-------+
* =A5 =28 =00 =03
*
* ...and then, after being run through BE_32(), serializing this out to
* disk:
*
* 0 8 16 24 32
* +-------+-------+-------+-------+
* | level | Vhigh | Vmid | Vlow |
* +-------+-------+-------+-------+
* =03 =00 =28 =A5
*
* while on big-endian systems, since BE_32() is a noop there, both in
* memory and on disk, we wind up with:
*
* 0 8 16 24 32
* +-------+-------+-------+-------+
* | Vhigh | Vmid | Vlow | level |
* +-------+-------+-------+-------+
* =00 =28 =A5 =03
*
* (Vhigh is always 0 until version exceeds 6.55.35. Vmid and Vlow are the
* other two bytes of the "version" data.)
*
* So now we use the BF32_SET macros to get consistent behavior (the
* ondisk LE encoding, since x86 currently rules the world) across
* platforms, but the "get" behavior requires that we check each of the
* bytes in the aforementioned former-bitfield for 0x00, and from there,
* we can know which possible layout we're dealing with. (Only the two
* that have been observed in the wild are illustrated above, but handlers
* for all 4 positions of 0x00 are implemented.
*/
static inline void
zfs_get_hdrmeta(const zfs_zstdhdr_t *blob, zfs_zstdmeta_t *res)
{
uint32_t raw = blob->raw_version_level;
uint8_t findme = 0xff;
int shift;
for (shift = 0; shift < 4; shift++) {
findme = BF32_GET(raw, 8*shift, 8);
if (findme == 0)
break;
}
switch (shift) {
case 0:
res->level = BF32_GET(raw, 24, 8);
res->version = BSWAP_32(raw);
res->version = BF32_GET(res->version, 8, 24);
break;
case 1:
res->level = BF32_GET(raw, 0, 8);
res->version = BSWAP_32(raw);
res->version = BF32_GET(res->version, 0, 24);
break;
case 2:
res->level = BF32_GET(raw, 24, 8);
res->version = BF32_GET(raw, 0, 24);
break;
case 3:
res->level = BF32_GET(raw, 0, 8);
res->version = BF32_GET(raw, 8, 24);
break;
default:
res->level = 0;
res->version = 0;
break;
}
}
static inline uint8_t
zfs_get_hdrlevel(const zfs_zstdhdr_t *blob)
{
uint8_t level = 0;
zfs_zstdmeta_t res;
zfs_get_hdrmeta(blob, &res);
level = res.level;
return (level);
}
static inline uint32_t
zfs_get_hdrversion(const zfs_zstdhdr_t *blob)
{
uint32_t version = 0;
zfs_zstdmeta_t res;
zfs_get_hdrmeta(blob, &res);
version = res.version;
return (version);
}
static inline void
zfs_set_hdrversion(zfs_zstdhdr_t *blob, uint32_t version)
{
BF32_SET(blob->raw_version_level, 0, 24, version);
}
static inline void
zfs_set_hdrlevel(zfs_zstdhdr_t *blob, uint8_t level)
{
BF32_SET(blob->raw_version_level, 24, 8, level);
}
#ifdef __cplusplus
}
#endif

View File

@ -201,6 +201,9 @@ Does not include disk time.
.It Sy scrub
Amount of time I/O spent in scrub queue.
Does not include disk time.
.It Sy rebuild
Amount of time I/O spent in rebuild queue.
Does not include disk time.
.El
.It Fl l
Include average latency statistics:
@ -221,6 +224,9 @@ Does not include disk time.
.It Sy trim
Average queuing time in trim queue.
Does not include disk time.
.It Sy rebuild
Average queuing time in rebuild queue.
Does not include disk time.
.El
.It Fl q
Include active queue statistics.
@ -242,6 +248,8 @@ Current number of entries in asynchronous priority queues.
Current number of entries in scrub queue.
.It Sy trimq_write
Current number of entries in trim queue.
.It Sy rebuildq_write
Current number of entries in rebuild queue.
.El
.Pp
All queue statistics are instantaneous measurements of the number of

View File

@ -52,10 +52,10 @@
Removes the specified device from the pool.
This command supports removing hot spare, cache, log, and both mirrored and
non-redundant primary top-level vdevs, including dedup and special vdevs.
When the primary pool storage includes a top-level raidz vdev only hot spare,
cache, and log devices can be removed.
Note that keys for all encrypted datasets must be loaded for top-level vdevs
to be removed.
.Pp
Top-level vdevs can only be removed if the primary pool storage does not contain
a top-level raidz vdev, all top-level vdevs have the same sector size, and the
keys for all encrypted datasets are loaded.
.Pp
Removing a top-level vdev reduces the total amount of space in the storage pool.
The specified device will be evacuated by copying all allocated space from it to

View File

@ -5222,6 +5222,11 @@ zfs_freebsd_pathconf(struct vop_pathconf_args *ap)
case _PC_NAME_MAX:
*ap->a_retval = NAME_MAX;
return (0);
#if __FreeBSD_version >= 1400032
case _PC_DEALLOC_PRESENT:
*ap->a_retval = 1;
return (0);
#endif
case _PC_PIPE_BUF:
if (ap->a_vp->v_type == VDIR || ap->a_vp->v_type == VFIFO) {
*ap->a_retval = PIPE_BUF;
@ -5690,7 +5695,7 @@ zfs_setextattr(struct vop_setextattr_args *ap)
}
if (error) {
error = zfs_setextattr_dir(ap, attrname);
if (error == 0)
if (error == 0 && zp->z_is_sa)
/*
* Successfully put into dir, we need to clear the one
* in SA if present.
@ -6057,6 +6062,55 @@ zfs_vptocnp(struct vop_vptocnp_args *ap)
return (error);
}
#if __FreeBSD_version >= 1400032
static int
zfs_deallocate(struct vop_deallocate_args *ap)
{
znode_t *zp = VTOZ(ap->a_vp);
zfsvfs_t *zfsvfs = zp->z_zfsvfs;
zilog_t *zilog;
off_t off, len, file_sz;
int error;
ZFS_ENTER(zfsvfs);
ZFS_VERIFY_ZP(zp);
/*
* Callers might not be able to detect properly that we are read-only,
* so check it explicitly here.
*/
if (zfs_is_readonly(zfsvfs)) {
ZFS_EXIT(zfsvfs);
return (SET_ERROR(EROFS));
}
zilog = zfsvfs->z_log;
off = *ap->a_offset;
len = *ap->a_len;
file_sz = zp->z_size;
if (off + len > file_sz)
len = file_sz - off;
/* Fast path for out-of-range request. */
if (len <= 0) {
*ap->a_len = 0;
ZFS_EXIT(zfsvfs);
return (0);
}
error = zfs_freesp(zp, off, len, O_RDWR, TRUE);
if (error == 0) {
if (zfsvfs->z_os->os_sync == ZFS_SYNC_ALWAYS ||
(ap->a_ioflag & IO_SYNC) != 0)
zil_commit(zilog, zp->z_id);
*ap->a_offset = off + len;
*ap->a_len = 0;
}
ZFS_EXIT(zfsvfs);
return (error);
}
#endif
struct vop_vector zfs_vnodeops;
struct vop_vector zfs_fifoops;
struct vop_vector zfs_shareops;
@ -6076,6 +6130,9 @@ struct vop_vector zfs_vnodeops = {
#endif
.vop_access = zfs_freebsd_access,
.vop_allocate = VOP_EINVAL,
#if __FreeBSD_version >= 1400032
.vop_deallocate = zfs_deallocate,
#endif
.vop_lookup = zfs_cache_lookup,
.vop_cachedlookup = zfs_freebsd_cachedlookup,
.vop_getattr = zfs_freebsd_getattr,

View File

@ -1481,12 +1481,16 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
error = dmu_free_long_range(zfsvfs->z_os, zp->z_id, off, len);
if (error == 0) {
#if __FreeBSD_version >= 1400032
vnode_pager_purge_range(ZTOV(zp), off, off + len);
#else
/*
* In FreeBSD we cannot free block in the middle of a file,
* but only at the end of a file, so this code path should
* never happen.
* Before __FreeBSD_version 1400032 we cannot free block in the
* middle of a file, but only at the end of a file, so this code
* path should never happen.
*/
vnode_pager_setsize(ZTOV(zp), off);
#endif
}
zfs_rangelock_exit(lr);

View File

@ -4583,13 +4583,10 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
* ZIO_PRIORITY_ASYNC_READ,
* ZIO_PRIORITY_ASYNC_WRITE,
* ZIO_PRIORITY_SCRUB,
* ZIO_PRIORITY_TRIM.
* ZIO_PRIORITY_TRIM,
* ZIO_PRIORITY_REBUILD.
*/
if (priority == ZIO_PRIORITY_REBUILD) {
priority = ((type == ZIO_TYPE_WRITE) ?
ZIO_PRIORITY_ASYNC_WRITE :
ZIO_PRIORITY_SCRUB);
} else if (priority == ZIO_PRIORITY_INITIALIZING) {
if (priority == ZIO_PRIORITY_INITIALIZING) {
ASSERT3U(type, ==, ZIO_TYPE_WRITE);
priority = ZIO_PRIORITY_ASYNC_WRITE;
} else if (priority == ZIO_PRIORITY_REMOVAL) {

View File

@ -256,6 +256,9 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_TRIM_ACTIVE_QUEUE,
vsx->vsx_active_queue[ZIO_PRIORITY_TRIM]);
fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_REBUILD_ACTIVE_QUEUE,
vsx->vsx_active_queue[ZIO_PRIORITY_REBUILD]);
/* ZIOs pending */
fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SYNC_R_PEND_QUEUE,
vsx->vsx_pend_queue[ZIO_PRIORITY_SYNC_READ]);
@ -275,6 +278,9 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_TRIM_PEND_QUEUE,
vsx->vsx_pend_queue[ZIO_PRIORITY_TRIM]);
fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_REBUILD_PEND_QUEUE,
vsx->vsx_pend_queue[ZIO_PRIORITY_REBUILD]);
/* Histograms */
fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_TOT_R_LAT_HISTO,
vsx->vsx_total_histo[ZIO_TYPE_READ],
@ -316,6 +322,10 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
vsx->vsx_queue_histo[ZIO_PRIORITY_TRIM],
ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_TRIM]));
fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_REBUILD_LAT_HISTO,
vsx->vsx_queue_histo[ZIO_PRIORITY_REBUILD],
ARRAY_SIZE(vsx->vsx_queue_histo[ZIO_PRIORITY_REBUILD]));
/* Request sizes */
fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_IND_R_HISTO,
vsx->vsx_ind_histo[ZIO_PRIORITY_SYNC_READ],
@ -341,6 +351,10 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
vsx->vsx_ind_histo[ZIO_PRIORITY_TRIM],
ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_TRIM]));
fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_IND_REBUILD_HISTO,
vsx->vsx_ind_histo[ZIO_PRIORITY_REBUILD],
ARRAY_SIZE(vsx->vsx_ind_histo[ZIO_PRIORITY_REBUILD]));
fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_SYNC_AGG_R_HISTO,
vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ],
ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_SYNC_READ]));
@ -365,6 +379,10 @@ vdev_config_generate_stats(vdev_t *vd, nvlist_t *nv)
vsx->vsx_agg_histo[ZIO_PRIORITY_TRIM],
ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_TRIM]));
fnvlist_add_uint64_array(nvx, ZPOOL_CONFIG_VDEV_AGG_REBUILD_HISTO,
vsx->vsx_agg_histo[ZIO_PRIORITY_REBUILD],
ARRAY_SIZE(vsx->vsx_agg_histo[ZIO_PRIORITY_REBUILD]));
/* IO delays */
fnvlist_add_uint64(nvx, ZPOOL_CONFIG_VDEV_SLOW_IOS, vs->vs_slow_ios);

View File

@ -465,6 +465,7 @@ benchmark_raidz(void)
raidz_supp_impl_cnt = c; /* number of supported impl */
#if defined(_KERNEL)
abd_t *pabd;
zio_t *bench_zio = NULL;
raidz_map_t *bench_rm = NULL;
uint64_t bench_parity;
@ -492,6 +493,12 @@ benchmark_raidz(void)
bench_rm = vdev_raidz_map_alloc(bench_zio, SPA_MINBLOCKSHIFT,
BENCH_COLS, PARITY_PQR);
/* Ensure that fake parity blocks are initialized */
for (c = 0; c < bench_rm->rm_row[0]->rr_firstdatacol; c++) {
pabd = bench_rm->rm_row[0]->rr_col[c].rc_abd;
memset(abd_to_buf(pabd), 0xAA, abd_get_size(pabd));
}
for (int fn = 0; fn < RAIDZ_REC_NUM; fn++)
benchmark_raidz_impl(bench_rm, fn, benchmark_rec_impl);

View File

@ -33,6 +33,7 @@ $(obj)/zfs_zstd.o: c_flags += -include $(zstd_include)/zstd_compat_wrapper.h
$(MODULE)-objs += zfs_zstd.o
$(MODULE)-objs += lib/zstd.o
$(MODULE)-objs += zstd_sparc.o
all:
mkdir -p lib

View File

@ -0,0 +1,4 @@
#if defined(__sparc)
uint64_t __bswapdi2(uint64_t in);
uint32_t __bswapsi2(uint32_t in);
#endif

View File

@ -380,6 +380,7 @@ zstd_enum_to_level(enum zio_zstd_levels level, int16_t *zstd_level)
return (1);
}
/* Compress block using zstd */
size_t
zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
@ -477,8 +478,8 @@ zfs_zstd_compress(void *s_start, void *d_start, size_t s_len, size_t d_len,
* As soon as such incompatibility occurs, handling code needs to be
* added, differentiating between the versions.
*/
hdr->version = ZSTD_VERSION_NUMBER;
hdr->level = level;
zfs_set_hdrversion(hdr, ZSTD_VERSION_NUMBER);
zfs_set_hdrlevel(hdr, level);
hdr->raw_version_level = BE_32(hdr->raw_version_level);
return (c_len + sizeof (*hdr));
@ -504,6 +505,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
* not modify the original data that may be used again later.
*/
hdr_copy.raw_version_level = BE_32(hdr->raw_version_level);
uint8_t curlevel = zfs_get_hdrlevel(&hdr_copy);
/*
* NOTE: We ignore the ZSTD version for now. As soon as any
@ -516,13 +518,13 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
* An invalid level is a strong indicator for data corruption! In such
* case return an error so the upper layers can try to fix it.
*/
if (zstd_enum_to_level(hdr_copy.level, &zstd_level)) {
if (zstd_enum_to_level(curlevel, &zstd_level)) {
ZSTDSTAT_BUMP(zstd_stat_dec_inval);
return (1);
}
ASSERT3U(d_len, >=, s_len);
ASSERT3U(hdr_copy.level, !=, ZIO_COMPLEVEL_INHERIT);
ASSERT3U(curlevel, !=, ZIO_COMPLEVEL_INHERIT);
/* Invalid compressed buffer size encoded at start */
if (c_len + sizeof (*hdr) > s_len) {
@ -553,7 +555,7 @@ zfs_zstd_decompress_level(void *s_start, void *d_start, size_t s_len,
}
if (level) {
*level = hdr_copy.level;
*level = curlevel;
}
return (0);
@ -790,7 +792,7 @@ module_exit(zstd_fini);
ZFS_MODULE_DESCRIPTION("ZSTD Compression for ZFS");
ZFS_MODULE_LICENSE("Dual BSD/GPL");
ZFS_MODULE_VERSION(ZSTD_VERSION_STRING);
ZFS_MODULE_VERSION(ZSTD_VERSION_STRING "a");
EXPORT_SYMBOL(zfs_zstd_compress);
EXPORT_SYMBOL(zfs_zstd_decompress_level);

View File

@ -0,0 +1,11 @@
#ifdef __sparc__
#include <stdint.h>
#include <sys/byteorder.h>
#include "include/sparc_compat.h"
uint64_t __bswapdi2(uint64_t in) {
return (BSWAP_64(in));
}
uint32_t __bswapsi2(uint32_t in) {
return (BSWAP_32(in));
}
#endif

View File

@ -603,6 +603,10 @@ tags = ['functional', 'delegate']
tests = ['exec_001_pos', 'exec_002_neg']
tags = ['functional', 'exec']
[tests/functional/fallocate]
tests = ['fallocate_punch-hole']
tags = ['functional', 'fallocate']
[tests/functional/features/async_destroy]
tests = ['async_destroy_001_pos']
tags = ['functional', 'features', 'async_destroy']

View File

@ -94,7 +94,7 @@ tests = ['events_001_pos', 'events_002_pos', 'zed_rc_filter', 'zed_fd_spill']
tags = ['functional', 'events']
[tests/functional/fallocate:Linux]
tests = ['fallocate_prealloc', 'fallocate_punch-hole']
tests = ['fallocate_prealloc']
tags = ['functional', 'fallocate']
[tests/functional/fault:Linux]

View File

@ -126,6 +126,13 @@ fio_reason = 'Fio v2.3 or newer required'
#
trim_reason = 'DISKS must support discard (TRIM/UNMAP)'
#
# Some tests on FreeBSD require the fspacectl(2) system call and the
# truncate(1) utility supporting the -d option. The system call was first
# introduced in FreeBSD version 1400032.
#
fspacectl_reason = 'fspacectl(2) and truncate -d support required'
#
# Some tests are not applicable to a platform or need to be updated to operate
# in the manor required by the platform. Any tests which are skipped for this
@ -224,6 +231,7 @@ maybe = {
'cli_root/zpool_trim/setup': ['SKIP', trim_reason],
'cli_root/zpool_upgrade/zpool_upgrade_004_pos': ['FAIL', '6141'],
'delegate/setup': ['SKIP', exec_reason],
'fallocate/fallocate_punch-hole': ['SKIP', fspacectl_reason],
'history/history_004_pos': ['FAIL', '7026'],
'history/history_005_neg': ['FAIL', '6680'],
'history/history_006_neg': ['FAIL', '5657'],

View File

@ -93,7 +93,7 @@ function block_device_wait
typeset missing=false
typeset dev
for dev in "${@}"; do
if ! [[ -f $dev ]]; then
if ! [[ -e $dev ]]; then
missing=true
break
fi

View File

@ -27,6 +27,7 @@
# Copyright (c) 2017, Lawrence Livermore National Security LLC.
# Copyright (c) 2017, Datto Inc. All rights reserved.
# Copyright (c) 2017, Open-E Inc. All rights reserved.
# Copyright (c) 2021, The FreeBSD Foundation.
# Use is subject to license terms.
#
@ -4194,6 +4195,25 @@ function get_arcstat # stat
esac
}
function punch_hole # offset length file
{
typeset offset=$1
typeset length=$2
typeset file=$3
case $(uname) in
FreeBSD)
truncate -d -o $offset -l $length "$file"
;;
Linux)
fallocate --punch-hole --offset $offset --length $length "$file"
;;
*)
false
;;
esac
}
#
# Wait for the specified arcstat to reach non-zero quiescence.
# If echo is 1 echo the value after reaching quiescence, otherwise

View File

@ -87,7 +87,7 @@ function do_vol_test
log_must zfs create -V $VOLSIZE -o copies=$copies $vol
log_must zfs set refreservation=none $vol
block_device_wait
block_device_wait $vol_r_path
case "$type" in
"ext2")

View File

@ -117,26 +117,26 @@ log_must diff $SRC_FILE $obj
if is_global_zone; then
vol=$TESTPOOL/$TESTFS/vol.$$ ; volclone=$TESTPOOL/$TESTFS/volclone.$$
log_must zfs create -V 100M $vol
block_device_wait
obj=$(target_obj $vol)
block_device_wait $obj
log_must dd if=$SRC_FILE of=$obj bs=$BS count=$CNT
snap=${vol}@snap.$$
log_must zfs snapshot $snap
log_must zfs clone $snap $volclone
block_device_wait
# Rename dataset & clone
log_must zfs rename $vol ${vol}-new
log_must zfs rename $volclone ${volclone}-new
block_device_wait
# Compare source file and target file
obj=$(target_obj ${vol}-new)
block_device_wait $obj
log_must dd if=$obj of=$DST_FILE bs=$BS count=$CNT
log_must diff $SRC_FILE $DST_FILE
obj=$(target_obj ${volclone}-new)
block_device_wait $obj
log_must dd if=$obj of=$DST_FILE bs=$BS count=$CNT
log_must diff $SRC_FILE $DST_FILE
@ -144,10 +144,10 @@ if is_global_zone; then
log_must zfs rename ${vol}-new $vol
log_must zfs rename $snap ${snap}-new
log_must zfs clone ${snap}-new $volclone
block_device_wait
# Compare source file and target file
obj=$(target_obj $volclone)
block_device_wait $obj
log_must dd if=$obj of=$DST_FILE bs=$BS count=$CNT
log_must diff $SRC_FILE $DST_FILE
fi

View File

@ -22,13 +22,14 @@
#
# Copyright (c) 2020 by Lawrence Livermore National Security, LLC.
# Copyright (c) 2021 by The FreeBSD Foundation.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Test `fallocate --punch-hole`
# Test hole-punching functionality
#
# STRATEGY:
# 1. Create a dense file
@ -37,6 +38,20 @@
verify_runnable "global"
#
# Prior to __FreeBSD_version 1400032 there are no mechanism to punch hole in a
# file on FreeBSD. truncate -d support is required to call fspacectl(2) on
# behalf of the script.
#
if is_freebsd; then
if [[ $(uname -K) -lt 1400032 ]]; then
log_unsupported "Requires fspacectl(2) support on FreeBSD"
fi
if truncate -d 2>&1 | grep "illegal option" > /dev/null; then
log_unsupported "Requires truncate(1) -d support on FreeBSD"
fi
fi
FILE=$TESTDIR/$TESTFILE0
BLKSZ=$(get_prop recordsize $TESTPOOL)
@ -74,23 +89,21 @@ log_must file_write -o create -f $FILE -b $BLKSZ -c 8
log_must check_disk_size $((131072 * 8))
# Punch a hole for the first full block.
log_must fallocate --punch-hole --offset 0 --length $BLKSZ $FILE
log_must punch_hole 0 $BLKSZ $FILE
log_must check_disk_size $((131072 * 7))
# Partially punch a hole in the second block.
log_must fallocate --punch-hole --offset $BLKSZ --length $((BLKSZ / 2)) $FILE
log_must punch_hole $BLKSZ $((BLKSZ / 2)) $FILE
log_must check_disk_size $((131072 * 7))
# Punch a hole which overlaps the third and forth block.
log_must fallocate --punch-hole --offset $(((BLKSZ * 2) + (BLKSZ / 2))) \
--length $((BLKSZ)) $FILE
log_must punch_hole $(((BLKSZ * 2) + (BLKSZ / 2))) $((BLKSZ)) $FILE
log_must check_disk_size $((131072 * 7))
# Punch a hole from the fifth block past the end of file. The apparent
# file size should not change since --keep-size is implied.
apparent_size=$(stat_size $FILE)
log_must fallocate --punch-hole --offset $((BLKSZ * 4)) \
--length $((BLKSZ * 10)) $FILE
log_must punch_hole $((BLKSZ * 4)) $((BLKSZ * 10)) $FILE
log_must check_disk_size $((131072 * 4))
log_must check_apparent_size $apparent_size

View File

@ -52,7 +52,7 @@ log_must eval "bzcat <$sendfile_compressed >$sendfile"
log_must eval "zstream redup $sendfile | zfs recv $TESTPOOL/recv"
log_must zfs load-key $TESTPOOL/recv
block_device_wait
block_device_wait $volfile
log_must eval "bzcat <$volfile_compressed >$volfile"
log_must diff $volfile $recvdev

View File

@ -65,7 +65,7 @@ for compress in "${compress_prop_vals[@]}"; do
datasetexists $send_vol && log_must_busy zfs destroy -r $send_vol
log_must zfs create -o compress=$compress $send_ds
log_must zfs create -V 1g -o compress=$compress $send_vol
block_device_wait
block_device_wait $send_voldev
typeset dir=$(get_prop mountpoint $send_ds)
log_must cp $file $dir

View File

@ -62,7 +62,7 @@ for vbs in 8192 16384 32768 65536 131072; do
# Create a sparse volume to test larger sizes
log_must zfs create -s -b $vbs -V $volsize $vol
block_device_wait
block_device_wait $swapname
log_must swap_setup $swapname
new_volsize=$(get_prop volsize $vol)

View File

@ -809,7 +809,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g8ae86e2ed"
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g3b89d9518"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -839,7 +839,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "FreeBSD_g8ae86e2ed"
#define ZFS_META_RELEASE "FreeBSD_g3b89d9518"
/* Define the project version. */
#define ZFS_META_VERSION "2.1.99"

View File

@ -2,4 +2,4 @@
* $FreeBSD$
*/
#define ZFS_META_GITREV "zfs-2.1.99-419-g8ae86e2ed"
#define ZFS_META_GITREV "zfs-2.1.99-430-g3b89d9518"