fusefs: enable write clustering
Enable write clustering in fusefs whenever cache mode is set to writeback and the "async" mount option is used. With default values for MAXPHYS, DFLTPHYS, and the fuse max_write mount parameter, that means sequential writes will now be written 128KB at a time instead of 64KB. Also, add a regression test for PR 238565, a panic during unmount that probably affects UFS, ext2, and msdosfs as well as fusefs. PR: 238565 Sponsored by: The FreeBSD Foundation
This commit is contained in:
parent
dff3a6b410
commit
8eecd9ce05
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=349036
@ -29,7 +29,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd May 19, 2019
|
||||
.Dd June 14, 2019
|
||||
.Dt MOUNT_FUSEFS 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -136,21 +136,24 @@ The following options are available (and also their negated versions,
|
||||
by prefixing them with
|
||||
.Dq no ) :
|
||||
.Bl -tag -width indent
|
||||
.It Cm default_permissions
|
||||
Enable traditional (file mode based) permission checking in kernel
|
||||
.It Cm allow_other
|
||||
Do not apply
|
||||
.Sx STRICT ACCESS POLICY .
|
||||
Only root can use this option
|
||||
.It Cm async
|
||||
I/O to the file system may be done asynchronously.
|
||||
Writes may delayed and/or reordered.
|
||||
.It Cm default_permissions
|
||||
Enable traditional (file mode based) permission checking in kernel
|
||||
.It Cm max_read Ns = Ns Ar n
|
||||
Limit size of read requests to
|
||||
.Ar n
|
||||
.It Cm neglect_shares
|
||||
Do not refuse unmounting if there are secondary mounts
|
||||
.It Cm private
|
||||
Refuse shared mounting of the daemon.
|
||||
This is the default behaviour, to allow sharing, expicitly use
|
||||
.Fl o Cm noprivate
|
||||
.It Cm neglect_shares
|
||||
Do not refuse unmounting if there are secondary mounts
|
||||
.It Cm push_symlinks_in
|
||||
Prefix absolute symlinks with the mountpoint
|
||||
.It Cm subtype Ns = Ns Ar fsname
|
||||
|
@ -88,6 +88,8 @@ static struct mntopt mopts[] = {
|
||||
{ "large_read", 0, 0x00, 1 },
|
||||
/* "nonempty", just the first two chars are stripped off during parsing */
|
||||
{ "nempty", 0, 0x00, 1 },
|
||||
{ "async", 0, MNT_ASYNC, 0},
|
||||
{ "noasync", 1, MNT_ASYNC, 0},
|
||||
MOPT_STDOPTS,
|
||||
MOPT_END
|
||||
};
|
||||
|
@ -565,11 +565,13 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
|
||||
daddr_t lbn;
|
||||
off_t filesize;
|
||||
int bcount;
|
||||
int n, on, err = 0;
|
||||
int n, on, seqcount, err = 0;
|
||||
bool last_page;
|
||||
|
||||
const int biosize = fuse_iosize(vp);
|
||||
|
||||
seqcount = ioflag >> IO_SEQSHIFT;
|
||||
|
||||
KASSERT(uio->uio_rw == UIO_WRITE, ("fuse_write_biobackend mode"));
|
||||
if (vp->v_type != VREG)
|
||||
return (EIO);
|
||||
@ -644,6 +646,7 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
|
||||
* with other readers
|
||||
*/
|
||||
err = fuse_vnode_setsize(vp, uio->uio_offset + n);
|
||||
filesize = uio->uio_offset + n;
|
||||
fvdat->flag |= FN_SIZECHANGE;
|
||||
if (err) {
|
||||
brelse(bp);
|
||||
@ -787,20 +790,22 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
|
||||
} else if (vm_page_count_severe() ||
|
||||
buf_dirty_count_severe() ||
|
||||
(ioflag & IO_ASYNC)) {
|
||||
/* TODO: enable write clustering later */
|
||||
bp->b_flags |= B_CLUSTEROK;
|
||||
SDT_PROBE2(fusefs, , io, write_biobackend_issue, 3, bp);
|
||||
bawrite(bp);
|
||||
} else if (on == 0 && n == bcount) {
|
||||
if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) {
|
||||
bp->b_flags |= B_CLUSTEROK;
|
||||
SDT_PROBE2(fusefs, , io, write_biobackend_issue,
|
||||
4, bp);
|
||||
bdwrite(bp);
|
||||
cluster_write(vp, bp, filesize, seqcount, 0);
|
||||
} else {
|
||||
SDT_PROBE2(fusefs, , io, write_biobackend_issue,
|
||||
5, bp);
|
||||
bawrite(bp);
|
||||
}
|
||||
} else if (ioflag & IO_DIRECT) {
|
||||
bp->b_flags |= B_CLUSTEROK;
|
||||
SDT_PROBE2(fusefs, , io, write_biobackend_issue, 6, bp);
|
||||
bawrite(bp);
|
||||
} else {
|
||||
|
@ -433,6 +433,7 @@ fuse_vfsop_mount(struct mount *mp)
|
||||
}
|
||||
copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &len);
|
||||
bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len);
|
||||
mp->mnt_iosize_max = MAXPHYS;
|
||||
|
||||
/* Now handshaking with daemon */
|
||||
fuse_internal_send_init(data, td);
|
||||
|
@ -336,7 +336,7 @@ void MockFS::debug_response(const mockfs_buf_out &out) {
|
||||
|
||||
MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
|
||||
bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags,
|
||||
uint32_t kernel_minor_version)
|
||||
uint32_t kernel_minor_version, uint32_t max_write, bool async)
|
||||
{
|
||||
struct sigaction sa;
|
||||
struct iovec *iov = NULL;
|
||||
@ -347,6 +347,7 @@ MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
|
||||
m_daemon_id = NULL;
|
||||
m_kernel_minor_version = kernel_minor_version;
|
||||
m_maxreadahead = max_readahead;
|
||||
m_maxwrite = max_write;
|
||||
m_nready = -1;
|
||||
m_pm = pm;
|
||||
m_quit = false;
|
||||
@ -404,6 +405,10 @@ MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions,
|
||||
build_iovec(&iov, &iovlen, "ro",
|
||||
__DECONST(void*, &trueval), sizeof(bool));
|
||||
}
|
||||
if (async) {
|
||||
build_iovec(&iov, &iovlen, "async", __DECONST(void*, &trueval),
|
||||
sizeof(bool));
|
||||
}
|
||||
if (nmount(iov, iovlen, 0))
|
||||
throw(std::system_error(errno, std::system_category(),
|
||||
"Couldn't mount filesystem"));
|
||||
@ -449,15 +454,7 @@ void MockFS::init(uint32_t flags) {
|
||||
out->body.init.minor = m_kernel_minor_version;;
|
||||
out->body.init.flags = in->body.init.flags & flags;
|
||||
|
||||
/*
|
||||
* The default max_write is set to this formula in libfuse, though
|
||||
* individual filesystems can lower it. The "- 4096" was added in
|
||||
* commit 154ffe2, with the commit message "fix".
|
||||
*/
|
||||
uint32_t default_max_write = 32 * getpagesize() + 0x1000 - 4096;
|
||||
/* For testing purposes, it should be distinct from MAXPHYS */
|
||||
m_max_write = MIN(default_max_write, MAXPHYS / 2);
|
||||
out->body.init.max_write = m_max_write;
|
||||
out->body.init.max_write = m_maxwrite;
|
||||
|
||||
out->body.init.max_readahead = m_maxreadahead;
|
||||
SET_OUT_HEADER_LEN(*out, init);
|
||||
|
@ -252,7 +252,7 @@ class MockFS {
|
||||
|
||||
int m_kq;
|
||||
|
||||
/* The max_readahead filesystem option */
|
||||
/* The max_readahead file system option */
|
||||
uint32_t m_maxreadahead;
|
||||
|
||||
/* pid of the test process */
|
||||
@ -288,7 +288,7 @@ class MockFS {
|
||||
pid_t m_child_pid;
|
||||
|
||||
/* Maximum size of a FUSE_WRITE write */
|
||||
uint32_t m_max_write;
|
||||
uint32_t m_maxwrite;
|
||||
|
||||
/*
|
||||
* Number of events that were available from /dev/fuse after the last
|
||||
@ -303,7 +303,7 @@ class MockFS {
|
||||
MockFS(int max_readahead, bool allow_other,
|
||||
bool default_permissions, bool push_symlinks_in, bool ro,
|
||||
enum poll_method pm, uint32_t flags,
|
||||
uint32_t kernel_minor_version);
|
||||
uint32_t kernel_minor_version, uint32_t max_write, bool async);
|
||||
|
||||
virtual ~MockFS();
|
||||
|
||||
|
@ -50,6 +50,20 @@ extern "C" {
|
||||
|
||||
using namespace testing;
|
||||
|
||||
/*
|
||||
* The default max_write is set to this formula in libfuse, though
|
||||
* individual filesystems can lower it. The "- 4096" was added in
|
||||
* commit 154ffe2, with the commit message "fix".
|
||||
*/
|
||||
const uint32_t libfuse_max_write = 32 * getpagesize() + 0x1000 - 4096;
|
||||
|
||||
/*
|
||||
* Set the default max_write to a distinct value from MAXPHYS to catch bugs
|
||||
* that confuse the two.
|
||||
*/
|
||||
const uint32_t default_max_write = MIN(libfuse_max_write, MAXPHYS / 2);
|
||||
|
||||
|
||||
/* Check that fusefs(4) is accessible and the current user can mount(2) */
|
||||
void check_environment()
|
||||
{
|
||||
@ -98,7 +112,8 @@ void FuseTest::SetUp() {
|
||||
try {
|
||||
m_mock = new MockFS(m_maxreadahead, m_allow_other,
|
||||
m_default_permissions, m_push_symlinks_in, m_ro,
|
||||
m_pm, m_init_flags, m_kernel_minor_version);
|
||||
m_pm, m_init_flags, m_kernel_minor_version,
|
||||
m_maxwrite, m_async);
|
||||
/*
|
||||
* FUSE_ACCESS is called almost universally. Expecting it in
|
||||
* each test case would be super-annoying. Instead, set a
|
||||
|
@ -40,9 +40,12 @@ inline void nap()
|
||||
usleep(NAP_NS / 1000);
|
||||
}
|
||||
|
||||
extern const uint32_t libfuse_max_write;
|
||||
extern const uint32_t default_max_write;
|
||||
class FuseTest : public ::testing::Test {
|
||||
protected:
|
||||
uint32_t m_maxreadahead;
|
||||
uint32_t m_maxwrite;
|
||||
uint32_t m_init_flags;
|
||||
bool m_allow_other;
|
||||
bool m_default_permissions;
|
||||
@ -50,6 +53,7 @@ class FuseTest : public ::testing::Test {
|
||||
enum poll_method m_pm;
|
||||
bool m_push_symlinks_in;
|
||||
bool m_ro;
|
||||
bool m_async;
|
||||
MockFS *m_mock = NULL;
|
||||
const static uint64_t FH = 0xdeadbeef1a7ebabe;
|
||||
|
||||
@ -62,13 +66,15 @@ class FuseTest : public ::testing::Test {
|
||||
* be lowered
|
||||
*/
|
||||
m_maxreadahead(UINT_MAX),
|
||||
m_maxwrite(default_max_write),
|
||||
m_init_flags(0),
|
||||
m_allow_other(false),
|
||||
m_default_permissions(false),
|
||||
m_kernel_minor_version(FUSE_KERNEL_MINOR_VERSION),
|
||||
m_pm(BLOCKING),
|
||||
m_push_symlinks_in(false),
|
||||
m_ro(false)
|
||||
m_ro(false),
|
||||
m_async(false)
|
||||
{}
|
||||
|
||||
virtual void SetUp();
|
||||
|
@ -29,7 +29,7 @@
|
||||
*/
|
||||
|
||||
extern "C" {
|
||||
#include <sys/types.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/stat.h>
|
||||
@ -179,6 +179,19 @@ void expect_write(uint64_t ino, uint64_t offset, uint64_t isize,
|
||||
}
|
||||
};
|
||||
|
||||
/* Tests for clustered writes with WriteBack cacheing */
|
||||
class WriteCluster: public WriteBack {
|
||||
public:
|
||||
virtual void SetUp() {
|
||||
if (MAXPHYS < 2 * DFLTPHYS)
|
||||
GTEST_SKIP() << "MAXPHYS must be at least twice DFLTPHYS"
|
||||
<< "for this test";
|
||||
m_async = true;
|
||||
m_maxwrite = MAXPHYS;
|
||||
WriteBack::SetUp();
|
||||
}
|
||||
};
|
||||
|
||||
void sigxfsz_handler(int __unused sig) {
|
||||
Write::s_sigxfsz = 1;
|
||||
}
|
||||
@ -617,7 +630,7 @@ TEST_F(Write, write_large)
|
||||
int fd;
|
||||
ssize_t halfbufsize, bufsize;
|
||||
|
||||
halfbufsize = m_mock->m_max_write;
|
||||
halfbufsize = m_mock->m_maxwrite;
|
||||
bufsize = halfbufsize * 2;
|
||||
contents = (int*)malloc(bufsize);
|
||||
ASSERT_NE(NULL, contents);
|
||||
@ -711,6 +724,89 @@ TEST_F(WriteBack, close)
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/* In writeback mode, adjacent writes will be clustered together */
|
||||
TEST_F(WriteCluster, clustering)
|
||||
{
|
||||
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||
const char RELPATH[] = "some_file.txt";
|
||||
uint64_t ino = 42;
|
||||
int i, fd;
|
||||
void *wbuf, *wbuf2x;
|
||||
ssize_t bufsize = 65536;
|
||||
off_t filesize = 327680;
|
||||
|
||||
wbuf = malloc(bufsize);
|
||||
ASSERT_NE(NULL, wbuf) << strerror(errno);
|
||||
memset(wbuf, 'X', bufsize);
|
||||
wbuf2x = malloc(2 * bufsize);
|
||||
ASSERT_NE(NULL, wbuf2x) << strerror(errno);
|
||||
memset(wbuf2x, 'X', 2 * bufsize);
|
||||
|
||||
expect_lookup(RELPATH, ino, filesize);
|
||||
expect_open(ino, 0, 1);
|
||||
/*
|
||||
* Writes of bufsize-bytes each should be clustered into greater sizes.
|
||||
* The amount of clustering is adaptive, so the first write actually
|
||||
* issued will be 2x bufsize and subsequent writes may be larger
|
||||
*/
|
||||
expect_write(ino, 0, 2 * bufsize, 2 * bufsize, wbuf2x);
|
||||
expect_write(ino, 2 * bufsize, 2 * bufsize, 2 * bufsize, wbuf2x);
|
||||
expect_flush(ino, 1, ReturnErrno(0));
|
||||
expect_release(ino, ReturnErrno(0));
|
||||
|
||||
fd = open(FULLPATH, O_RDWR);
|
||||
ASSERT_LE(0, fd) << strerror(errno);
|
||||
|
||||
for (i = 0; i < 4; i++) {
|
||||
ASSERT_EQ(bufsize, write(fd, wbuf, bufsize))
|
||||
<< strerror(errno);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* When clustering writes, an I/O error to any of the cluster's children should
|
||||
* not panic the system on unmount
|
||||
*/
|
||||
/*
|
||||
* Disabled because it panics.
|
||||
* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=238565
|
||||
*/
|
||||
TEST_F(WriteCluster, DISABLED_cluster_write_err)
|
||||
{
|
||||
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||
const char RELPATH[] = "some_file.txt";
|
||||
uint64_t ino = 42;
|
||||
int i, fd;
|
||||
void *wbuf;
|
||||
ssize_t bufsize = 65536;
|
||||
off_t filesize = 262144;
|
||||
|
||||
wbuf = malloc(bufsize);
|
||||
ASSERT_NE(NULL, wbuf) << strerror(errno);
|
||||
memset(wbuf, 'X', bufsize);
|
||||
|
||||
expect_lookup(RELPATH, ino, filesize);
|
||||
expect_open(ino, 0, 1);
|
||||
EXPECT_CALL(*m_mock, process(
|
||||
ResultOf([=](auto in) {
|
||||
return (in.header.opcode == FUSE_WRITE);
|
||||
}, Eq(true)),
|
||||
_)
|
||||
).WillRepeatedly(Invoke(ReturnErrno(EIO)));
|
||||
expect_flush(ino, 1, ReturnErrno(0));
|
||||
expect_release(ino, ReturnErrno(0));
|
||||
|
||||
fd = open(FULLPATH, O_RDWR);
|
||||
ASSERT_LE(0, fd) << strerror(errno);
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
ASSERT_EQ(bufsize, write(fd, wbuf, bufsize))
|
||||
<< strerror(errno);
|
||||
}
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* In writeback mode, writes to an O_WRONLY file could trigger reads from the
|
||||
* server. The FUSE protocol explicitly allows that.
|
||||
|
Loading…
Reference in New Issue
Block a user