zfs: merge openzfs/zfs@07a4c76e9 (master) into main

Notable upstream pull request merges:
  #12299 file reference counts can get corrupted
  #12320 FreeBSD: Use unmapped I/O for scattered/gang ABD buffers

Obtained from:	OpenZFS
OpenZFS commit:	07a4c76e90
This commit is contained in:
Martin Matuska 2021-07-12 23:23:38 +02:00
commit 5eb61f6c65
16 changed files with 220 additions and 129 deletions

View File

@ -2,7 +2,7 @@
name: Bug report
about: Create a report to help us improve OpenZFS
title: ''
labels: 'Type: Defect, Status: Triage Needed'
labels: 'Type: Defect'
assignees: ''
---
@ -25,14 +25,16 @@ Type | Version/Name
--- | ---
Distribution Name |
Distribution Version |
Linux Kernel |
Kernel Version |
Architecture |
ZFS Version |
SPL Version |
OpenZFS Version |
<!--
Commands to find ZFS/SPL versions:
modinfo zfs | grep -iw version
modinfo spl | grep -iw version
Command to find OpenZFS version:
zfs version
Commands to find kernel version:
uname -r # Linux
freebsd-version -r # FreeBSD
-->
### Describe the problem you're observing

View File

@ -465,7 +465,7 @@ dbuf_find_dirty_eq(dmu_buf_impl_t *db, uint64_t txg)
char __db_buf[32]; \
uint64_t __db_obj = (dbuf)->db.db_object; \
if (__db_obj == DMU_META_DNODE_OBJECT) \
(void) strcpy(__db_buf, "mdn"); \
(void) strlcpy(__db_buf, "mdn", sizeof (__db_buf)); \
else \
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
(u_longlong_t)__db_obj); \

View File

@ -600,7 +600,7 @@ extern dnode_stats_t dnode_stats;
char __db_buf[32]; \
uint64_t __db_obj = (dn)->dn_object; \
if (__db_obj == DMU_META_DNODE_OBJECT) \
(void) strcpy(__db_buf, "mdn"); \
(void) strlcpy(__db_buf, "mdn", sizeof (__db_buf)); \
else \
(void) snprintf(__db_buf, sizeof (__db_buf), "%lld", \
(u_longlong_t)__db_obj);\

View File

@ -31,6 +31,7 @@ extern "C" {
#endif
#include <sys/nvpair.h>
#include <sys/zfs_file.h>
/*
* Shared user/kernel definitions for class length, error channel name,
@ -95,8 +96,8 @@ extern void fm_fini(void);
extern void zfs_zevent_post_cb(nvlist_t *nvl, nvlist_t *detector);
extern int zfs_zevent_post(nvlist_t *, nvlist_t *, zevent_cb_t *);
extern void zfs_zevent_drain_all(int *);
extern int zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
extern void zfs_zevent_fd_rele(int);
extern zfs_file_t *zfs_zevent_fd_hold(int, minor_t *, zfs_zevent_t **);
extern void zfs_zevent_fd_rele(zfs_file_t *);
extern int zfs_zevent_next(zfs_zevent_t *, nvlist_t **, uint64_t *, uint64_t *);
extern int zfs_zevent_wait(zfs_zevent_t *);
extern int zfs_zevent_seek(zfs_zevent_t *, uint64_t);

View File

@ -22,6 +22,8 @@
#ifndef _SYS_ZFS_FILE_H
#define _SYS_ZFS_FILE_H
#include <sys/zfs_context.h>
#ifndef _KERNEL
typedef struct zfs_file {
int f_fd;
@ -55,8 +57,8 @@ int zfs_file_fallocate(zfs_file_t *fp, int mode, loff_t offset, loff_t len);
loff_t zfs_file_off(zfs_file_t *fp);
int zfs_file_unlink(const char *);
int zfs_file_get(int fd, zfs_file_t **fp);
void zfs_file_put(int fd);
zfs_file_t *zfs_file_get(int fd);
void zfs_file_put(zfs_file_t *fp);
void *zfs_file_private(zfs_file_t *fp);
#endif /* _SYS_ZFS_FILE_H */

View File

@ -566,7 +566,7 @@ typedef struct zfsdev_state {
} zfsdev_state_t;
extern void *zfsdev_get_state(minor_t minor, enum zfsdev_state_type which);
extern int zfsdev_getminor(int fd, minor_t *minorp);
extern int zfsdev_getminor(zfs_file_t *fp, minor_t *minorp);
extern uint_t zfs_fsyncer_key;
extern uint_t zfs_allow_log_key;

View File

@ -51,8 +51,8 @@ extern void zfs_onexit_destroy(zfs_onexit_t *zo);
#endif
extern int zfs_onexit_fd_hold(int fd, minor_t *minorp);
extern void zfs_onexit_fd_rele(int fd);
extern zfs_file_t *zfs_onexit_fd_hold(int fd, minor_t *minorp);
extern void zfs_onexit_fd_rele(zfs_file_t *);
extern int zfs_onexit_add_cb(minor_t minor, void (*func)(void *), void *data,
uint64_t *action_handle);

View File

@ -4871,7 +4871,9 @@ zpool_load_compat(const char *compat, boolean_t *features, char *report,
line != NULL;
line = strtok_r(NULL, "\n", &ls)) {
/* discard comments */
*(strchrnul(line, '#')) = '\0';
char *r = strchr(line, '#');
if (r != NULL)
*r = '\0';
for (word = strtok_r(line, ", \t", &ws);
word != NULL;

View File

@ -928,16 +928,16 @@ kmem_asprintf(const char *fmt, ...)
}
/* ARGSUSED */
int
zfs_file_t *
zfs_onexit_fd_hold(int fd, minor_t *minorp)
{
*minorp = 0;
return (0);
return (NULL);
}
/* ARGSUSED */
void
zfs_onexit_fd_rele(int fd)
zfs_onexit_fd_rele(zfs_file_t *fp)
{
}
@ -1347,28 +1347,26 @@ zfs_file_unlink(const char *path)
* Get reference to file pointer
*
* fd - input file descriptor
* fpp - pointer to file pointer
*
* Returns 0 on success EBADF on failure.
* Returns pointer to file struct or NULL.
* Unsupported in user space.
*/
int
zfs_file_get(int fd, zfs_file_t **fpp)
zfs_file_t *
zfs_file_get(int fd)
{
abort();
return (EOPNOTSUPP);
return (NULL);
}
/*
* Drop reference to file pointer
*
* fd - input file descriptor
* fp - pointer to file struct
*
* Unsupported in user space.
*/
void
zfs_file_put(int fd)
zfs_file_put(zfs_file_t *fp)
{
abort();
}

View File

@ -29,6 +29,7 @@
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/file.h>
#include <sys/spa.h>
#include <sys/spa_impl.h>
@ -36,6 +37,7 @@
#include <sys/vdev_os.h>
#include <sys/fs/zfs.h>
#include <sys/zio.h>
#include <vm/vm_page.h>
#include <geom/geom.h>
#include <geom/geom_disk.h>
#include <geom/geom_int.h>
@ -1059,6 +1061,80 @@ vdev_geom_io_intr(struct bio *bp)
zio_delay_interrupt(zio);
}
struct vdev_geom_check_unmapped_cb_state {
int pages;
uint_t end;
};
/*
* Callback to check the ABD segment size/alignment and count the pages.
* GEOM requires data buffer to look virtually contiguous. It means only
* the first page of the buffer may not start and only the last may not
* end on a page boundary. All other physical pages must be full.
*/
static int
vdev_geom_check_unmapped_cb(void *buf, size_t len, void *priv)
{
struct vdev_geom_check_unmapped_cb_state *s = priv;
vm_offset_t off = (vm_offset_t)buf & PAGE_MASK;
if (s->pages != 0 && off != 0)
return (1);
if (s->end != 0)
return (1);
s->end = (off + len) & PAGE_MASK;
s->pages += (off + len + PAGE_MASK) >> PAGE_SHIFT;
return (0);
}
/*
* Check whether we can use unmapped I/O for this ZIO on this device to
* avoid data copying between scattered and/or gang ABD buffer and linear.
*/
static int
vdev_geom_check_unmapped(zio_t *zio, struct g_consumer *cp)
{
struct vdev_geom_check_unmapped_cb_state s;
/* If the buffer is already linear, then nothing to do here. */
if (abd_is_linear(zio->io_abd))
return (0);
/*
* If unmapped I/O is not supported by the GEOM provider,
* then we can't do anything and have to copy the data.
*/
if ((cp->provider->flags & G_PF_ACCEPT_UNMAPPED) == 0)
return (0);
/* Check the buffer chunks sizes/alignments and count pages. */
s.pages = s.end = 0;
if (abd_iterate_func(zio->io_abd, 0, zio->io_size,
vdev_geom_check_unmapped_cb, &s))
return (0);
return (s.pages);
}
/*
* Callback to translate the ABD segment into array of physical pages.
*/
static int
vdev_geom_fill_unmap_cb(void *buf, size_t len, void *priv)
{
struct bio *bp = priv;
vm_offset_t addr = (vm_offset_t)buf;
vm_offset_t end = addr + len;
if (bp->bio_ma_n == 0)
bp->bio_ma_offset = addr & PAGE_MASK;
do {
bp->bio_ma[bp->bio_ma_n++] =
PHYS_TO_VM_PAGE(pmap_kextract(addr));
addr += PAGE_SIZE;
} while (addr < end);
return (0);
}
static void
vdev_geom_io_start(zio_t *zio)
{
@ -1123,14 +1199,34 @@ vdev_geom_io_start(zio_t *zio)
zio->io_target_timestamp = zio_handle_io_delay(zio);
bp->bio_offset = zio->io_offset;
bp->bio_length = zio->io_size;
if (zio->io_type == ZIO_TYPE_READ) {
if (zio->io_type == ZIO_TYPE_READ)
bp->bio_cmd = BIO_READ;
bp->bio_data =
abd_borrow_buf(zio->io_abd, zio->io_size);
} else {
else
bp->bio_cmd = BIO_WRITE;
bp->bio_data =
abd_borrow_buf_copy(zio->io_abd, zio->io_size);
/*
* If possible, represent scattered and/or gang ABD buffer to
* GEOM as an array of physical pages. It allows to satisfy
* requirement of virtually contiguous buffer without copying.
*/
int pgs = vdev_geom_check_unmapped(zio, cp);
if (pgs > 0) {
bp->bio_ma = malloc(sizeof (struct vm_page *) * pgs,
M_DEVBUF, M_WAITOK);
bp->bio_ma_n = 0;
bp->bio_ma_offset = 0;
abd_iterate_func(zio->io_abd, 0, zio->io_size,
vdev_geom_fill_unmap_cb, bp);
bp->bio_data = unmapped_buf;
bp->bio_flags |= BIO_UNMAPPED;
} else {
if (zio->io_type == ZIO_TYPE_READ) {
bp->bio_data = abd_borrow_buf(zio->io_abd,
zio->io_size);
} else {
bp->bio_data = abd_borrow_buf_copy(zio->io_abd,
zio->io_size);
}
}
break;
case ZIO_TYPE_TRIM:
@ -1169,10 +1265,17 @@ vdev_geom_io_done(zio_t *zio)
return;
}
if (zio->io_type == ZIO_TYPE_READ)
abd_return_buf_copy(zio->io_abd, bp->bio_data, zio->io_size);
else
abd_return_buf(zio->io_abd, bp->bio_data, zio->io_size);
if (bp->bio_ma != NULL) {
free(bp->bio_ma, M_DEVBUF);
} else {
if (zio->io_type == ZIO_TYPE_READ) {
abd_return_buf_copy(zio->io_abd, bp->bio_data,
zio->io_size);
} else {
abd_return_buf(zio->io_abd, bp->bio_data,
zio->io_size);
}
}
g_destroy_bio(bp);
zio->io_bio = NULL;

View File

@ -241,28 +241,21 @@ zfs_file_fsync(zfs_file_t *fp, int flags)
return (zfs_vop_fsync(fp->f_vnode));
}
int
zfs_file_get(int fd, zfs_file_t **fpp)
zfs_file_t *
zfs_file_get(int fd)
{
struct file *fp;
if (fget(curthread, fd, &cap_no_rights, &fp))
return (SET_ERROR(EBADF));
return (NULL);
*fpp = fp;
return (0);
return (fp);
}
void
zfs_file_put(int fd)
zfs_file_put(zfs_file_t *fp)
{
struct file *fp;
/* No CAP_ rights required, as we're only releasing. */
if (fget(curthread, fd, &cap_no_rights, &fp) == 0) {
fdrop(fp, curthread);
fdrop(fp, curthread);
}
fdrop(fp, curthread);
}
loff_t

View File

@ -407,36 +407,22 @@ zfs_file_unlink(const char *path)
* Get reference to file pointer
*
* fd - input file descriptor
* fpp - pointer to file pointer
*
* Returns 0 on success EBADF on failure.
* Returns pointer to file struct or NULL
*/
int
zfs_file_get(int fd, zfs_file_t **fpp)
zfs_file_t *
zfs_file_get(int fd)
{
zfs_file_t *fp;
fp = fget(fd);
if (fp == NULL)
return (EBADF);
*fpp = fp;
return (0);
return (fget(fd));
}
/*
* Drop reference to file pointer
*
* fd - input file descriptor
* fp - input file struct pointer
*/
void
zfs_file_put(int fd)
zfs_file_put(zfs_file_t *fp)
{
struct file *fp;
if ((fp = fget(fd)) != NULL) {
fput(fp);
fput(fp);
}
fput(fp);
}

View File

@ -278,25 +278,29 @@ zfs_zevent_minor_to_state(minor_t minor, zfs_zevent_t **ze)
return (0);
}
int
zfs_file_t *
zfs_zevent_fd_hold(int fd, minor_t *minorp, zfs_zevent_t **ze)
{
int error;
zfs_file_t *fp = zfs_file_get(fd);
if (fp == NULL)
return (NULL);
error = zfsdev_getminor(fd, minorp);
int error = zfsdev_getminor(fp, minorp);
if (error == 0)
error = zfs_zevent_minor_to_state(*minorp, ze);
if (error)
zfs_zevent_fd_rele(fd);
if (error) {
zfs_zevent_fd_rele(fp);
fp = NULL;
}
return (error);
return (fp);
}
void
zfs_zevent_fd_rele(int fd)
zfs_zevent_fd_rele(zfs_file_t *fp)
{
zfs_file_put(fd);
zfs_file_put(fp);
}
/*

View File

@ -4861,8 +4861,8 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
*errors = fnvlist_alloc();
off = 0;
if ((error = zfs_file_get(input_fd, &input_fp)))
return (error);
if ((input_fp = zfs_file_get(input_fd)) == NULL)
return (SET_ERROR(EBADF));
noff = off = zfs_file_off(input_fp);
error = dmu_recv_begin(tofs, tosnap, begin_record, force,
@ -5142,7 +5142,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
nvlist_free(inheritprops);
}
out:
zfs_file_put(input_fd);
zfs_file_put(input_fp);
nvlist_free(origrecvd);
nvlist_free(origprops);
@ -5472,8 +5472,8 @@ zfs_ioc_send(zfs_cmd_t *zc)
zfs_file_t *fp;
dmu_send_outparams_t out = {0};
if ((error = zfs_file_get(zc->zc_cookie, &fp)))
return (error);
if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
out.dso_outfunc = dump_bytes;
@ -5483,7 +5483,7 @@ zfs_ioc_send(zfs_cmd_t *zc)
zc->zc_fromobj, embedok, large_block_ok, compressok,
rawok, savedok, zc->zc_cookie, &off, &out);
zfs_file_put(zc->zc_cookie);
zfs_file_put(fp);
}
return (error);
}
@ -6047,25 +6047,24 @@ zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
{
char *snap_name;
char *hold_name;
int error;
minor_t minor;
error = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
if (error != 0)
return (error);
zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
if (fp == NULL)
return (SET_ERROR(EBADF));
snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
(u_longlong_t)ddi_get_lbolt64());
hold_name = kmem_asprintf("%%%s", zc->zc_value);
error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
hold_name);
if (error == 0)
(void) strlcpy(zc->zc_value, snap_name,
sizeof (zc->zc_value));
kmem_strfree(snap_name);
kmem_strfree(hold_name);
zfs_onexit_fd_rele(zc->zc_cleanup_fd);
zfs_onexit_fd_rele(fp);
return (error);
}
@ -6085,13 +6084,13 @@ zfs_ioc_diff(zfs_cmd_t *zc)
offset_t off;
int error;
if ((error = zfs_file_get(zc->zc_cookie, &fp)))
return (error);
if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
zfs_file_put(zc->zc_cookie);
zfs_file_put(fp);
return (error);
}
@ -6127,6 +6126,7 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
int cleanup_fd = -1;
int error;
minor_t minor = 0;
zfs_file_t *fp = NULL;
holds = fnvlist_lookup_nvlist(args, "holds");
@ -6144,14 +6144,16 @@ zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
}
if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
error = zfs_onexit_fd_hold(cleanup_fd, &minor);
if (error != 0)
return (SET_ERROR(error));
fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
if (fp == NULL)
return (SET_ERROR(EBADF));
}
error = dsl_dataset_user_hold(holds, minor, errlist);
if (minor != 0)
zfs_onexit_fd_rele(cleanup_fd);
if (fp != NULL) {
ASSERT3U(minor, !=, 0);
zfs_onexit_fd_rele(fp);
}
return (SET_ERROR(error));
}
@ -6214,9 +6216,9 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
uint64_t dropped = 0;
int error;
error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
if (error != 0)
return (error);
zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
if (fp == NULL)
return (SET_ERROR(EBADF));
do {
error = zfs_zevent_next(ze, &event,
@ -6238,7 +6240,7 @@ zfs_ioc_events_next(zfs_cmd_t *zc)
break;
} while (1);
zfs_zevent_fd_rele(zc->zc_cleanup_fd);
zfs_zevent_fd_rele(fp);
return (error);
}
@ -6270,12 +6272,12 @@ zfs_ioc_events_seek(zfs_cmd_t *zc)
minor_t minor;
int error;
error = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
if (error != 0)
return (error);
zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
if (fp == NULL)
return (SET_ERROR(EBADF));
error = zfs_zevent_seek(ze, zc->zc_guid);
zfs_zevent_fd_rele(zc->zc_cleanup_fd);
zfs_zevent_fd_rele(fp);
return (error);
}
@ -6459,8 +6461,8 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
if ((error = zfs_file_get(fd, &fp)))
return (error);
if ((fp = zfs_file_get(fd)) == NULL)
return (SET_ERROR(EBADF));
off = zfs_file_off(fp);
@ -6472,7 +6474,7 @@ zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
compressok, rawok, savedok, resumeobj, resumeoff,
redactbook, fd, &off, &out);
zfs_file_put(fd);
zfs_file_put(fp);
return (error);
}
@ -7345,17 +7347,12 @@ pool_status_check(const char *name, zfs_ioc_namecheck_t type,
}
int
zfsdev_getminor(int fd, minor_t *minorp)
zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
{
zfsdev_state_t *zs, *fpd;
zfs_file_t *fp;
int rc;
ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
if ((rc = zfs_file_get(fd, &fp)))
return (rc);
fpd = zfs_file_private(fp);
if (fpd == NULL)
return (SET_ERROR(EBADF));

View File

@ -107,30 +107,33 @@ zfs_onexit_destroy(zfs_onexit_t *zo)
* of this function must call zfs_onexit_fd_rele() when they're finished
* using the minor number.
*/
int
zfs_file_t *
zfs_onexit_fd_hold(int fd, minor_t *minorp)
{
zfs_onexit_t *zo = NULL;
int error;
error = zfsdev_getminor(fd, minorp);
zfs_file_t *fp = zfs_file_get(fd);
if (fp == NULL)
return (NULL);
int error = zfsdev_getminor(fp, minorp);
if (error) {
zfs_onexit_fd_rele(fd);
return (error);
zfs_onexit_fd_rele(fp);
return (NULL);
}
zo = zfsdev_get_state(*minorp, ZST_ONEXIT);
if (zo == NULL) {
zfs_onexit_fd_rele(fd);
return (SET_ERROR(EBADF));
zfs_onexit_fd_rele(fp);
return (NULL);
}
return (0);
return (fp);
}
void
zfs_onexit_fd_rele(int fd)
zfs_onexit_fd_rele(zfs_file_t *fp)
{
zfs_file_put(fd);
zfs_file_put(fp);
}
static int

View File

@ -806,7 +806,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_gbdd11cbb9"
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g07a4c76e9"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -836,7 +836,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "FreeBSD_gbdd11cbb9"
#define ZFS_META_RELEASE "FreeBSD_g07a4c76e9"
/* Define the project version. */
#define ZFS_META_VERSION "2.1.99"