diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index cd24acd40355..6f727711b7c8 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -908,7 +908,8 @@ fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) if (fuse_libabi_geq(data, 7, 5)) { if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) { - data->max_readahead = fiio->max_readahead; + data->max_readahead_blocks = fiio->max_readahead / + maxbcachebuf; data->max_write = fiio->max_write; if (fiio->flags & FUSE_ASYNC_READ) data->dataflags |= FSESS_ASYNC_READ; diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c index 60871175ada3..ab5e18508d05 100644 --- a/sys/fs/fuse/fuse_io.c +++ b/sys/fs/fuse/fuse_io.c @@ -321,10 +321,10 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag, /* Try clustered read */ long totread = uio->uio_resid + on; seqcount = MIN(seqcount, - data->max_readahead / biosize + 1); + data->max_readahead_blocks + 1); err = cluster_read(vp, filesize, lbn, bcount, NOCRED, totread, seqcount, 0, &bp); - } else if (seqcount > 1 && data->max_readahead >= nextsize) { + } else if (seqcount > 1 && data->max_readahead_blocks >= 1) { /* Try non-clustered readahead */ err = breadn(vp, lbn, bcount, &nextlbn, &nextsize, 1, NOCRED, &bp); diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c index 06dd83b7e08b..b577b5e5579b 100644 --- a/sys/fs/fuse/fuse_ipc.c +++ b/sys/fs/fuse/fuse_ipc.c @@ -711,6 +711,10 @@ fuse_body_audit(struct fuse_ticket *ftick, size_t blen) opcode = fticket_opcode(ftick); switch (opcode) { + case FUSE_BMAP: + err = (blen == sizeof(struct fuse_bmap_out)) ? 0 : EINVAL; + break; + case FUSE_LINK: case FUSE_LOOKUP: case FUSE_MKDIR: diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h index 440a4f671490..0f9b1180f362 100644 --- a/sys/fs/fuse/fuse_ipc.h +++ b/sys/fs/fuse/fuse_ipc.h @@ -197,7 +197,7 @@ struct fuse_data { uint32_t fuse_libabi_major; uint32_t fuse_libabi_minor; - uint32_t max_readahead; + uint32_t max_readahead_blocks; uint32_t max_write; uint32_t max_read; uint32_t subtype; diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c index b1d8b5837930..4fcc53290f92 100644 --- a/sys/fs/fuse/fuse_vnops.c +++ b/sys/fs/fuse/fuse_vnops.c @@ -120,6 +120,7 @@ SDT_PROBE_DEFINE2(fusefs, , vnops, trace, "int", "char*"); /* vnode ops */ static vop_access_t fuse_vnop_access; static vop_advlock_t fuse_vnop_advlock; +static vop_bmap_t fuse_vnop_bmap; static vop_close_t fuse_fifo_close; static vop_close_t fuse_vnop_close; static vop_create_t fuse_vnop_create; @@ -174,6 +175,7 @@ struct vop_vector fuse_vnops = { .vop_default = &default_vnodeops, .vop_access = fuse_vnop_access, .vop_advlock = fuse_vnop_advlock, + .vop_bmap = fuse_vnop_bmap, .vop_close = fuse_vnop_close, .vop_create = fuse_vnop_create, .vop_deleteextattr = fuse_vnop_deleteextattr, @@ -466,6 +468,92 @@ fuse_vnop_advlock(struct vop_advlock_args *ap) return err; } +/* { + struct vnode *a_vp; + daddr_t a_bn; + struct bufobj **a_bop; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; +} */ +static int +fuse_vnop_bmap(struct vop_bmap_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct bufobj **bo = ap->a_bop; + struct thread *td = curthread; + struct mount *mp; + struct fuse_dispatcher fdi; + struct fuse_bmap_in *fbi; + struct fuse_bmap_out *fbo; + struct fuse_data *data; + uint64_t biosize; + off_t filesize; + daddr_t lbn = ap->a_bn; + daddr_t *pbn = ap->a_bnp; + int *runp = ap->a_runp; + int *runb = ap->a_runb; + int error = 0; + int maxrun; + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + + mp = vnode_mount(vp); + data = fuse_get_mpdata(mp); + biosize = fuse_iosize(vp); + maxrun = MIN(vp->v_mount->mnt_iosize_max / biosize - 1, + data->max_readahead_blocks); + + if (bo != NULL) + *bo = &vp->v_bufobj; + + /* + * The FUSE_BMAP operation does not include the runp and runb + * variables, so we must guess. Report nonzero contiguous runs so + * cluster_read will combine adjacent reads. It's worthwhile to reduce + * upcalls even if we don't know the true physical layout of the file. + * + * FUSE file systems may opt out of read clustering in two ways: + * * mounting with -onoclusterr + * * Setting max_readahead <= maxbcachebuf during FUSE_INIT + */ + if (runb != NULL) + *runb = MIN(lbn, maxrun); + if (runp != NULL) { + error = fuse_vnode_size(vp, &filesize, td->td_ucred, td); + if (error == 0) + *runp = MIN(MAX(0, filesize / biosize - lbn - 1), + maxrun); + else + *runp = 0; + } + + if (fsess_isimpl(mp, FUSE_BMAP)) { + fdisp_init(&fdi, sizeof(*fbi)); + fdisp_make_vp(&fdi, FUSE_BMAP, vp, td, td->td_ucred); + fbi = fdi.indata; + fbi->block = lbn; + fbi->blocksize = biosize; + error = fdisp_wait_answ(&fdi); + if (error == ENOSYS) { + fsess_set_notimpl(mp, FUSE_BMAP); + error = 0; + } else { + fbo = fdi.answ; + if (error == 0 && pbn != NULL) + *pbn = fbo->block; + return error; + } + } + + /* If the daemon doesn't support BMAP, make up a sensible default */ + if (pbn != NULL) + *pbn = lbn * btodb(biosize); + return (error); +} + /* struct vop_close_args { struct vnode *a_vp; diff --git a/tests/sys/fs/fusefs/Makefile b/tests/sys/fs/fusefs/Makefile index 8aa07933f602..1a4045e8f2f0 100644 --- a/tests/sys/fs/fusefs/Makefile +++ b/tests/sys/fs/fusefs/Makefile @@ -9,6 +9,7 @@ TESTSDIR= ${TESTSBASE}/sys/fs/fusefs # out, so we get more granular reporting. GTESTS+= access GTESTS+= allow_other +GTESTS+= bmap GTESTS+= create GTESTS+= default_permissions GTESTS+= default_permissions_privileged diff --git a/tests/sys/fs/fusefs/bmap.cc b/tests/sys/fs/fusefs/bmap.cc new file mode 100644 index 000000000000..3ab345a45377 --- /dev/null +++ b/tests/sys/fs/fusefs/bmap.cc @@ -0,0 +1,159 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 The FreeBSD Foundation + * + * This software was developed by BFF Storage Systems, LLC under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +extern "C" { +#include +#include +#include + +#include +} + +#include "mockfs.hh" +#include "utils.hh" + +using namespace testing; + +const static char FULLPATH[] = "mountpoint/foo"; +const static char RELPATH[] = "foo"; + +class Bmap: public FuseTest { +public: +virtual void SetUp() { + m_maxreadahead = UINT32_MAX; + FuseTest::SetUp(); +} +void expect_bmap(uint64_t ino, uint64_t lbn, uint32_t blocksize, uint64_t pbn) +{ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_BMAP && + in.header.nodeid == ino && + in.body.bmap.block == lbn && + in.body.bmap.blocksize == blocksize); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { + SET_OUT_HEADER_LEN(out, bmap); + out.body.bmap.block = pbn; + }))); +} + +void expect_lookup(const char *relpath, uint64_t ino, off_t size) +{ + FuseTest::expect_lookup(relpath, ino, S_IFREG | 0644, size, 1, + UINT64_MAX); +} +}; + +/* + * Test FUSE_BMAP + * XXX The FUSE protocol does not include the runp and runb variables, so those + * must be guessed in-kernel. + */ +TEST_F(Bmap, bmap) +{ + struct fiobmap2_arg arg; + const off_t filesize = 1 << 20; + const ino_t ino = 42; + int64_t lbn = 10; + int64_t pbn = 12345; + int fd; + + expect_lookup(RELPATH, 42, filesize); + expect_open(ino, 0, 1); + expect_bmap(ino, lbn, m_maxbcachebuf, pbn); + + fd = open(FULLPATH, O_RDWR); + ASSERT_LE(0, fd) << strerror(errno); + + arg.bn = lbn; + arg.runp = -1; + arg.runb = -1; + ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno); + EXPECT_EQ(arg.bn, pbn); + EXPECT_EQ(arg.runp, MAXPHYS / m_maxbcachebuf - 1); + EXPECT_EQ(arg.runb, MAXPHYS / m_maxbcachebuf - 1); +} + +/* + * If the daemon does not implement VOP_BMAP, fusefs should return sensible + * defaults. + */ +TEST_F(Bmap, default_) +{ + struct fiobmap2_arg arg; + const off_t filesize = 1 << 20; + const ino_t ino = 42; + int64_t lbn; + int fd; + + expect_lookup(RELPATH, 42, filesize); + expect_open(ino, 0, 1); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_BMAP); + }, Eq(true)), + _) + ).WillOnce(Invoke(ReturnErrno(ENOSYS))); + + fd = open(FULLPATH, O_RDWR); + ASSERT_LE(0, fd) << strerror(errno); + + /* First block */ + lbn = 0; + arg.bn = lbn; + arg.runp = -1; + arg.runb = -1; + ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno); + EXPECT_EQ(arg.bn, 0); + EXPECT_EQ(arg.runp, MAXPHYS / m_maxbcachebuf - 1); + EXPECT_EQ(arg.runb, 0); + + /* In the middle */ + lbn = filesize / m_maxbcachebuf / 2; + arg.bn = lbn; + arg.runp = -1; + arg.runb = -1; + ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno); + EXPECT_EQ(arg.bn, lbn * m_maxbcachebuf / DEV_BSIZE); + EXPECT_EQ(arg.runp, MAXPHYS / m_maxbcachebuf - 1); + EXPECT_EQ(arg.runb, MAXPHYS / m_maxbcachebuf - 1); + + /* Last block */ + lbn = filesize / m_maxbcachebuf - 1; + arg.bn = lbn; + arg.runp = -1; + arg.runb = -1; + ASSERT_EQ(0, ioctl(fd, FIOBMAP2, &arg)) << strerror(errno); + EXPECT_EQ(arg.bn, lbn * m_maxbcachebuf / DEV_BSIZE); + EXPECT_EQ(arg.runp, 0); + EXPECT_EQ(arg.runb, MAXPHYS / m_maxbcachebuf - 1); +} diff --git a/tests/sys/fs/fusefs/mockfs.cc b/tests/sys/fs/fusefs/mockfs.cc index 893ff40607f5..7a9e8b66a5c4 100644 --- a/tests/sys/fs/fusefs/mockfs.cc +++ b/tests/sys/fs/fusefs/mockfs.cc @@ -168,6 +168,10 @@ void MockFS::debug_request(const mockfs_buf_in &in) case FUSE_ACCESS: printf(" mask=%#x", in.body.access.mask); break; + case FUSE_BMAP: + printf(" block=%#lx blocksize=%#x", in.body.bmap.block, + in.body.bmap.blocksize); + break; case FUSE_CREATE: if (m_kernel_minor_version >= 12) name = (const char*)in.body.bytes + diff --git a/tests/sys/fs/fusefs/mockfs.hh b/tests/sys/fs/fusefs/mockfs.hh index e37cd750ebde..162df6281932 100644 --- a/tests/sys/fs/fusefs/mockfs.hh +++ b/tests/sys/fs/fusefs/mockfs.hh @@ -124,6 +124,7 @@ struct fuse_create_out_7_8 { union fuse_payloads_in { fuse_access_in access; + fuse_bmap_in bmap; /* value is from fuse_kern_chan.c in fusefs-libs */ uint8_t bytes[0x21000 - sizeof(struct fuse_in_header)]; fuse_create_in create; @@ -164,6 +165,7 @@ struct mockfs_buf_in { union fuse_payloads_out { fuse_attr_out attr; fuse_attr_out_7_8 attr_7_8; + fuse_bmap_out bmap; fuse_create_out create; fuse_create_out_7_8 create_7_8; /* diff --git a/tests/sys/fs/fusefs/read.cc b/tests/sys/fs/fusefs/read.cc index 6146db93f5d2..a0b531ccba74 100644 --- a/tests/sys/fs/fusefs/read.cc +++ b/tests/sys/fs/fusefs/read.cc @@ -29,7 +29,7 @@ */ extern "C" { -#include +#include #include #include #include @@ -749,16 +749,15 @@ TEST_F(ReadCacheable, DISABLED_sendfile_eio) /* Deliberately leak fd. close(2) will be tested in release.cc */ } -/* Large reads should be clustered, even across cache block boundaries */ -/* - * Disabled because clustered reads requires VOP_BMAP, which fusefs does not - * yet support +/* + * Sequential reads should use readahead. And if allowed, large reads should + * be clustered. */ -TEST_P(ReadAhead, DISABLED_cluster) { +TEST_P(ReadAhead, readahead) { const char FULLPATH[] = "mountpoint/some_file.txt"; const char RELPATH[] = "some_file.txt"; uint64_t ino = 42; - int fd, maxcontig; + int fd, maxcontig, clustersize; ssize_t bufsize = 4 * m_maxbcachebuf; ssize_t filesize = bufsize; uint64_t len; @@ -774,8 +773,9 @@ TEST_P(ReadAhead, DISABLED_cluster) { expect_open(ino, 0, 1); maxcontig = m_noclusterr ? m_maxbcachebuf : m_maxbcachebuf + (int)get<1>(GetParam()); - for (offs = 0; offs < bufsize; offs += maxcontig) { - len = std::min((size_t)maxcontig, (size_t)(filesize - offs)); + clustersize = MIN(maxcontig, MAXPHYS); + for (offs = 0; offs < bufsize; offs += clustersize) { + len = std::min((size_t)clustersize, (size_t)(filesize - offs)); expect_read(ino, offs, len, len, contents + offs); } @@ -791,47 +791,11 @@ TEST_P(ReadAhead, DISABLED_cluster) { /* Deliberately leak fd. close(2) will be tested in release.cc */ } -/* fuse(4) should honor the filesystem's requested m_readahead parameter */ -TEST_P(ReadAhead, readahead) { - const char FULLPATH[] = "mountpoint/some_file.txt"; - const char RELPATH[] = "some_file.txt"; - uint64_t ino = 42; - int fd, i; - ssize_t bufsize = m_maxbcachebuf; - ssize_t filesize = m_maxbcachebuf * 6; - char *rbuf, *contents; - - contents = (char*)malloc(filesize); - ASSERT_NE(NULL, contents); - memset(contents, 'X', filesize); - rbuf = (char*)calloc(1, bufsize); - - expect_lookup(RELPATH, ino, filesize); - expect_open(ino, 0, 1); - /* fuse(4) should only read ahead the allowed amount */ - expect_read(ino, 0, m_maxbcachebuf, m_maxbcachebuf, contents); - for (i = 0; i < (int)get<1>(GetParam()) / m_maxbcachebuf; i++) { - off_t offs = (i + 1) * m_maxbcachebuf; - expect_read(ino, offs, m_maxbcachebuf, m_maxbcachebuf, - contents + offs); - } - - fd = open(FULLPATH, O_RDONLY); - ASSERT_LE(0, fd) << strerror(errno); - - /* Set the internal readahead counter to a "large" value */ - ASSERT_EQ(0, fcntl(fd, F_READAHEAD, 1'000'000'000)) << strerror(errno); - - ASSERT_EQ(bufsize, read(fd, rbuf, bufsize)) << strerror(errno); - ASSERT_EQ(0, memcmp(rbuf, contents, bufsize)); - - /* Deliberately leak fd. close(2) will be tested in release.cc */ -} - INSTANTIATE_TEST_CASE_P(RA, ReadAhead, Values(tuple(false, 0u), tuple(false, 0x10000), tuple(false, 0x20000), tuple(false, 0x30000), tuple(true, 0u), - tuple(true, 0x10000))); + tuple(true, 0x10000), + tuple(true, 0x20000))); diff --git a/tests/sys/fs/fusefs/utils.cc b/tests/sys/fs/fusefs/utils.cc index 1b1fdb013bc6..cd8b8be40259 100644 --- a/tests/sys/fs/fusefs/utils.cc +++ b/tests/sys/fs/fusefs/utils.cc @@ -129,6 +129,20 @@ void FuseTest::SetUp() { _) ).Times(AnyNumber()) .WillRepeatedly(Invoke(ReturnErrno(ENOSYS))); + /* + * FUSE_BMAP is called for most test cases that read data. Set + * a default expectation and return ENOSYS. + * + * Individual test cases can override this expectation since + * googlemock evaluates expectations in LIFO order. + */ + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_BMAP); + }, Eq(true)), + _) + ).Times(AnyNumber()) + .WillRepeatedly(Invoke(ReturnErrno(ENOSYS))); } catch (std::system_error err) { FAIL() << err.what(); }