diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c index 97c10b2fdd28..71d0006219ad 100644 --- a/sys/fs/fuse/fuse_device.c +++ b/sys/fs/fuse/fuse_device.c @@ -81,6 +81,7 @@ __FBSDID("$FreeBSD$"); #include #include "fuse.h" +#include "fuse_internal.h" #include "fuse_ipc.h" SDT_PROVIDER_DECLARE(fusefs); @@ -393,17 +394,17 @@ fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio) "differs from size claimed by header"); return (EINVAL); } - if (uio->uio_resid && ohead->error) { + if (uio->uio_resid && ohead->unique != 0 && ohead->error) { SDT_PROBE2(fusefs, , device, trace, 1, "Format error: non zero error but message had a body"); return (EINVAL); } - /* Sanitize the linuxism of negative errnos */ - ohead->error = -(ohead->error); return (0); } +SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_notify, + "struct fuse_out_header*"); SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_missing_ticket, "uint64_t"); SDT_PROBE_DEFINE1(fusefs, , device, fuse_device_write_found, @@ -420,12 +421,14 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) struct fuse_out_header ohead; int err = 0; struct fuse_data *data; + struct mount *mp; struct fuse_ticket *tick, *itick, *x_tick; int found = 0; err = devfs_get_cdevpriv((void **)&data); if (err != 0) return (err); + mp = data->mp; if (uio->uio_resid < sizeof(struct fuse_out_header)) { SDT_PROBE2(fusefs, , device, trace, 1, @@ -489,6 +492,8 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) */ SDT_PROBE2(fusefs, , device, trace, 1, "pass ticket to a callback"); + /* Sanitize the linuxism of negative errnos */ + ohead.error *= -1; memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead)); err = tick->tk_aw_handler(tick, uio); } else { @@ -503,11 +508,24 @@ fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) * because fuse_ticket_drop() will deal with refcount anyway. */ fuse_ticket_drop(tick); + } else if (ohead.unique == 0){ + /* unique == 0 means asynchronous notification */ + SDT_PROBE1(fusefs, , device, fuse_device_write_notify, &ohead); + switch (ohead.error) { + case FUSE_NOTIFY_INVAL_ENTRY: + err = fuse_internal_invalidate_entry(mp, uio); + break; + case FUSE_NOTIFY_POLL: + case FUSE_NOTIFY_INVAL_INODE: + default: + /* Not implemented */ + err = ENOSYS; + } } else { /* no callback at all! */ SDT_PROBE1(fusefs, , device, fuse_device_write_missing_ticket, ohead.unique); - if (ohead.error == EAGAIN) { + if (ohead.error == -EAGAIN) { /* * This was probably a response to a FUSE_INTERRUPT * operation whose original operation is already diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c index 1c00a8164d5a..7fa02fd4468d 100644 --- a/sys/fs/fuse/fuse_internal.c +++ b/sys/fs/fuse/fuse_internal.c @@ -326,6 +326,71 @@ fuse_internal_fsync(struct vnode *vp, return err; } +/* Asynchronous invalidation */ +SDT_PROBE_DEFINE1(fusefs, , internal, invalidate_without_export, + "struct mount*"); +SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_cache_hit, + "struct vnode*", "struct vnode*"); +int +fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio) +{ + struct fuse_notify_inval_entry_out fnieo; + struct fuse_data *data = fuse_get_mpdata(mp); + struct componentname cn; + /*struct vnode *dvp;*/ + struct vnode *dvp, *vp; + char name[PATH_MAX]; + int err; + + if (!(data->dataflags & FSESS_EXPORT_SUPPORT)) { + /* + * Linux allows file systems without export support to use + * asynchronous notification because its inode cache is indexed + * purely by the inode number. But FreeBSD's vnode is cache + * requires access to the entire vnode structure. + */ + SDT_PROBE1(fusefs, , internal, invalidate_without_export, mp); + return (EINVAL); + } + + if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0) + return (err); + + if ((err = uiomove(name, fnieo.namelen, uio)) != 0) + return (err); + name[fnieo.namelen] = '\0'; + /* fusefs does not cache "." or ".." entries */ + if (strncmp(name, ".", sizeof(".")) == 0 || + strncmp(name, "..", sizeof("..")) == 0) + return (0); + + if (fnieo.parent == FUSE_ROOT_ID) + err = VFS_ROOT(mp, LK_SHARED, &dvp); + else + err = VFS_VGET(mp, fnieo.parent, LK_SHARED, &dvp); + if (err != 0) + return (err); + /* + * XXX we can't check dvp's generation because the FUSE invalidate + * entry message doesn't include it. Worse case is that we invalidate + * an entry that didn't need to be invalidated. + */ + + cn.cn_nameiop = LOOKUP; + cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */ + cn.cn_thread = curthread; + cn.cn_cred = curthread->td_ucred; + cn.cn_lkflags = LK_SHARED; + cn.cn_pnbuf = NULL; + cn.cn_nameptr = name; + cn.cn_namelen = fnieo.namelen; + err = cache_lookup(dvp, &vp, &cn, NULL, NULL); + MPASS(err == 0); + fuse_vnode_clear_attr_cache(dvp); + vput(dvp); + return (0); +} + /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h index a963c79cc1d8..77bdfe301b1e 100644 --- a/sys/fs/fuse/fuse_internal.h +++ b/sys/fs/fuse/fuse_internal.h @@ -228,17 +228,17 @@ int fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap, int fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred, struct thread *td); -/* readdir */ - -struct pseudo_dirent { - uint32_t d_namlen; -}; +/* asynchronous invalidation */ +int fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio); /* mknod */ int fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp, struct vattr *vap); /* readdir */ +struct pseudo_dirent { + uint32_t d_namlen; +}; int fuse_internal_readdir(struct vnode *vp, struct uio *uio, off_t startoff, struct fuse_filehandle *fufh, struct fuse_iov *cookediov, int *ncookies, u_long *cookies); diff --git a/tests/sys/fs/fusefs/Makefile b/tests/sys/fs/fusefs/Makefile index 8c2b5ffa6316..464abe566638 100644 --- a/tests/sys/fs/fusefs/Makefile +++ b/tests/sys/fs/fusefs/Makefile @@ -27,6 +27,7 @@ GTESTS+= mkdir GTESTS+= mknod GTESTS+= mount GTESTS+= nfs +GTESTS+= notify GTESTS+= open GTESTS+= opendir GTESTS+= read diff --git a/tests/sys/fs/fusefs/mockfs.cc b/tests/sys/fs/fusefs/mockfs.cc index a6551e7160d2..2e255bf4877e 100644 --- a/tests/sys/fs/fusefs/mockfs.cc +++ b/tests/sys/fs/fusefs/mockfs.cc @@ -153,7 +153,7 @@ void sigint_handler(int __unused sig) { // Don't do anything except interrupt the daemon's read(2) call } -void MockFS::debug_fuseop(const mockfs_buf_in &in) +void MockFS::debug_request(const mockfs_buf_in &in) { printf("%-11s ino=%2" PRIu64, opcode2opname(in.header.opcode), in.header.nodeid); @@ -303,6 +303,30 @@ void MockFS::debug_fuseop(const mockfs_buf_in &in) printf("\n"); } +/* + * Debug a FUSE response. + * + * This is mostly useful for asynchronous notifications, which don't correspond + * to any request + */ +void MockFS::debug_response(const mockfs_buf_out &out) { + const char *name; + + if (verbosity == 0) + return; + + switch (out.header.error) { + case FUSE_NOTIFY_INVAL_ENTRY: + name = (const char*)out.body.bytes + + sizeof(fuse_notify_inval_entry_out); + printf("<- INVAL_ENTRY parent=%" PRIu64 " %s\n", + out.body.inval_entry.parent, name); + break; + default: + break; + } +} + MockFS::MockFS(int max_readahead, bool allow_other, bool default_permissions, bool push_symlinks_in, bool ro, enum poll_method pm, uint32_t flags, uint32_t kernel_minor_version) @@ -455,7 +479,7 @@ void MockFS::loop() { if (m_quit) break; if (verbosity > 0) - debug_fuseop(*in); + debug_request(*in); if (pid_ok((pid_t)in->header.pid)) { process(*in, out); } else { @@ -475,6 +499,23 @@ void MockFS::loop() { } } +int MockFS::notify_inval_entry(ino_t parent, const char *name, size_t namelen) +{ + std::unique_ptr out(new mockfs_buf_out); + + out->header.unique = 0; /* 0 means asynchronous notification */ + out->header.error = FUSE_NOTIFY_INVAL_ENTRY; + out->body.inval_entry.parent = parent; + out->body.inval_entry.namelen = namelen; + strlcpy((char*)&out->body.bytes + sizeof(out->body.inval_entry), + name, sizeof(out->body.bytes) - sizeof(out->body.inval_entry)); + out->header.len = sizeof(out->header) + sizeof(out->body.inval_entry) + + namelen; + debug_response(*out); + write_response(*out); + return 0; +} + bool MockFS::pid_ok(pid_t pid) { if (pid == m_pid) { return (true); diff --git a/tests/sys/fs/fusefs/mockfs.hh b/tests/sys/fs/fusefs/mockfs.hh index 6943a0ecafbf..fbbbc5a8b04c 100644 --- a/tests/sys/fs/fusefs/mockfs.hh +++ b/tests/sys/fs/fusefs/mockfs.hh @@ -176,6 +176,8 @@ union fuse_payloads_out { fuse_lk_out getlk; fuse_getxattr_out getxattr; fuse_init_out init; + /* The inval_entry structure should be followed by the entry's name */ + fuse_notify_inval_entry_out inval_entry; fuse_listxattr_out listxattr; fuse_open_out open; fuse_statfs_out statfs; @@ -258,7 +260,8 @@ class MockFS { /* Method the daemon should use for I/O to and from /dev/fuse */ enum poll_method m_pm; - void debug_fuseop(const mockfs_buf_in&); + void debug_request(const mockfs_buf_in&); + void debug_response(const mockfs_buf_out&); /* Initialize a session after mounting */ void init(uint32_t flags); @@ -309,6 +312,19 @@ class MockFS { /* Process FUSE requests endlessly */ void loop(); + /* + * Send an asynchronous notification to invalidate a directory entry. + * Similar to libfuse's fuse_lowlevel_notify_inval_entry + * + * This method will block until the client has responded, so it should + * generally be run in a separate thread from request processing. + * + * @param parent Parent directory's inode number + * @param name name of dirent to invalidate + * @param namelen size of name, including the NUL + */ + int notify_inval_entry(ino_t parent, const char *name, size_t namelen); + /* * Request handler * diff --git a/tests/sys/fs/fusefs/notify.cc b/tests/sys/fs/fusefs/notify.cc new file mode 100644 index 000000000000..5e96b304b72c --- /dev/null +++ b/tests/sys/fs/fusefs/notify.cc @@ -0,0 +1,240 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * + * Copyright (c) 2019 The FreeBSD Foundation + * + * This software was developed by BFF Storage Systems, LLC under sponsorship + * from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +extern "C" { +#include +} + +#include "mockfs.hh" +#include "utils.hh" + +using namespace testing; + +/* + * FUSE asynchonous notification + * + * FUSE servers can send unprompted notification messages for things like cache + * invalidation. This file tests our client's handling of those messages. + */ + +class Notify: public FuseTest { +public: +public: +virtual void SetUp() { + m_init_flags = FUSE_EXPORT_SUPPORT; + FuseTest::SetUp(); +} + +void expect_lookup(uint64_t parent, const char *relpath, uint64_t ino, + Sequence &seq) +{ + EXPECT_LOOKUP(parent, relpath) + .InSequence(seq) + .WillOnce(Invoke( + ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.attr.mode = S_IFREG | 0644; + out.body.entry.nodeid = ino; + out.body.entry.attr.ino = ino; + out.body.entry.attr.nlink = 1; + out.body.entry.attr_valid = UINT64_MAX; + out.body.entry.entry_valid = UINT64_MAX; + }))); +} +}; + +struct inval_entry_args { + MockFS *mock; + ino_t parent; + const char *name; + size_t namelen; +}; + +static void* inval_entry(void* arg) { + const struct inval_entry_args *iea = (struct inval_entry_args*)arg; + ssize_t r; + + r = iea->mock->notify_inval_entry(iea->parent, iea->name, iea->namelen); + if (r >= 0) + return 0; + else + return (void*)(intptr_t)errno; +} + +/* Invalidate a nonexistent entry */ +TEST_F(Notify, inval_entry_nonexistent) +{ + const static char *name = "foo"; + struct inval_entry_args iea; + void *thr0_value; + pthread_t th0; + + iea.mock = m_mock; + iea.parent = FUSE_ROOT_ID; + iea.name = name; + iea.namelen = strlen(name); + ASSERT_EQ(0, pthread_create(&th0, NULL, inval_entry, &iea)) + << strerror(errno); + pthread_join(th0, &thr0_value); + /* It's not an error for an entry to not be cached */ + EXPECT_EQ(0, (intptr_t)thr0_value); +} + +/* Invalidate a cached entry */ +TEST_F(Notify, inval_entry) +{ + const static char FULLPATH[] = "mountpoint/foo"; + const static char RELPATH[] = "foo"; + struct inval_entry_args iea; + struct stat sb; + void *thr0_value; + uint64_t ino0 = 42; + uint64_t ino1 = 43; + Sequence seq; + pthread_t th0; + + expect_lookup(FUSE_ROOT_ID, RELPATH, ino0, seq); + expect_lookup(FUSE_ROOT_ID, RELPATH, ino1, seq); + + /* Fill the entry cache */ + ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); + EXPECT_EQ(ino0, sb.st_ino); + + /* Now invalidate the entry */ + iea.mock = m_mock; + iea.parent = FUSE_ROOT_ID; + iea.name = RELPATH; + iea.namelen = strlen(RELPATH); + ASSERT_EQ(0, pthread_create(&th0, NULL, inval_entry, &iea)) + << strerror(errno); + pthread_join(th0, &thr0_value); + /* It's not an error for an entry to not be cached */ + EXPECT_EQ(0, (intptr_t)thr0_value); + + /* The second lookup should return the alternate ino */ + ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); + EXPECT_EQ(ino1, sb.st_ino); +} + +/* + * Invalidate a cached entry beneath the root, which uses a slightly different + * code path. + */ +TEST_F(Notify, inval_entry_below_root) +{ + const static char FULLPATH[] = "mountpoint/some_dir/foo"; + const static char DNAME[] = "some_dir"; + const static char FNAME[] = "foo"; + struct inval_entry_args iea; + struct stat sb; + void *thr0_value; + uint64_t dir_ino = 41; + uint64_t ino0 = 42; + uint64_t ino1 = 43; + Sequence seq; + pthread_t th0; + + EXPECT_LOOKUP(FUSE_ROOT_ID, DNAME) + .WillOnce(Invoke( + ReturnImmediate([=](auto in __unused, auto& out) { + SET_OUT_HEADER_LEN(out, entry); + out.body.entry.attr.mode = S_IFDIR | 0755; + out.body.entry.nodeid = dir_ino; + out.body.entry.attr.nlink = 2; + out.body.entry.attr_valid = UINT64_MAX; + out.body.entry.entry_valid = UINT64_MAX; + }))); + expect_lookup(dir_ino, FNAME, ino0, seq); + expect_lookup(dir_ino, FNAME, ino1, seq); + + /* Fill the entry cache */ + ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); + EXPECT_EQ(ino0, sb.st_ino); + + /* Now invalidate the entry */ + iea.mock = m_mock; + iea.parent = dir_ino; + iea.name = FNAME; + iea.namelen = strlen(FNAME); + ASSERT_EQ(0, pthread_create(&th0, NULL, inval_entry, &iea)) + << strerror(errno); + pthread_join(th0, &thr0_value); + /* It's not an error for an entry to not be cached */ + EXPECT_EQ(0, (intptr_t)thr0_value); + + /* The second lookup should return the alternate ino */ + ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); + EXPECT_EQ(ino1, sb.st_ino); +} + +/* Invalidating an entry invalidates the parent directory's attributes */ +TEST_F(Notify, inval_entry_invalidates_parent_attrs) +{ + const static char FULLPATH[] = "mountpoint/foo"; + const static char RELPATH[] = "foo"; + struct inval_entry_args iea; + struct stat sb; + void *thr0_value; + uint64_t ino = 42; + Sequence seq; + pthread_t th0; + + expect_lookup(FUSE_ROOT_ID, RELPATH, ino, seq); + EXPECT_CALL(*m_mock, process( + ResultOf([=](auto in) { + return (in.header.opcode == FUSE_GETATTR && + in.header.nodeid == FUSE_ROOT_ID); + }, Eq(true)), + _) + ).Times(2) + .WillRepeatedly(Invoke(ReturnImmediate([=](auto i __unused, auto& out) { + SET_OUT_HEADER_LEN(out, attr); + out.body.attr.attr.mode = S_IFDIR | 0755; + out.body.attr.attr_valid = UINT64_MAX; + }))); + + /* Fill the attr and entry cache */ + ASSERT_EQ(0, stat("mountpoint", &sb)) << strerror(errno); + ASSERT_EQ(0, stat(FULLPATH, &sb)) << strerror(errno); + + /* Now invalidate the entry */ + iea.mock = m_mock; + iea.parent = FUSE_ROOT_ID; + iea.name = RELPATH; + iea.namelen = strlen(RELPATH); + ASSERT_EQ(0, pthread_create(&th0, NULL, inval_entry, &iea)) + << strerror(errno); + pthread_join(th0, &thr0_value); + /* It's not an error for an entry to not be cached */ + EXPECT_EQ(0, (intptr_t)thr0_value); + + /* /'s attribute cache should be cleared */ + ASSERT_EQ(0, stat("mountpoint", &sb)) << strerror(errno); +}