freebsd-dev/sys/fs/fuse/fuse_internal.c
Alan Somers bfcb817bcd Fix issues with FUSE_ACCESS when default_permissions is disabled
This patch fixes two issues relating to FUSE_ACCESS when the
default_permissions mount option is disabled:

* VOP_ACCESS() calls with VADMIN set should never be sent to a fuse server
  in the form of FUSE_ACCESS operations. The FUSE protocol has no equivalent
  of VADMIN, so we must evaluate such things kernel-side, regardless of the
  default_permissions setting.

* The FUSE protocol only requires FUSE_ACCESS to be sent for two purposes:
  for the access(2) syscall and to check directory permissions for
  searchability during lookup. FreeBSD sends it much more frequently, due to
  differences between our VFS and Linux's, for which FUSE was designed. But
  this patch does eliminate several cases not required by the FUSE protocol:

  * for any FUSE_*XATTR operation
  * when creating a new file
  * when deleting a file
  * when setting timestamps, such as by utimensat(2).

* Additionally, when default_permissions is disabled, this patch removes one
  FUSE_GETATTR operation when deleting a file.

PR:		245689
Reported by:	MooseFS FreeBSD Team <freebsd@moosefs.pro>
Reviewed by:	cem
MFC after:	2 weeks
Differential Revision:	https://reviews.freebsd.org/D24777
2020-05-22 18:11:17 +00:00

1258 lines
33 KiB
C

/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2007-2009 Google Inc. and Amit Singh
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following disclaimer
* in the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Google Inc. nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* Copyright (C) 2005 Csaba Henk.
* All rights reserved.
*
* Copyright (c) 2019 The FreeBSD Foundation
*
* Portions of this software were developed by BFF Storage Systems, LLC under
* sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/counter.h>
#include <sys/module.h>
#include <sys/errno.h>
#include <sys/kernel.h>
#include <sys/conf.h>
#include <sys/uio.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sdt.h>
#include <sys/sx.h>
#include <sys/proc.h>
#include <sys/mount.h>
#include <sys/vnode.h>
#include <sys/namei.h>
#include <sys/stat.h>
#include <sys/unistd.h>
#include <sys/filedesc.h>
#include <sys/file.h>
#include <sys/fcntl.h>
#include <sys/dirent.h>
#include <sys/bio.h>
#include <sys/buf.h>
#include <sys/sysctl.h>
#include <sys/priv.h>
#include "fuse.h"
#include "fuse_file.h"
#include "fuse_internal.h"
#include "fuse_io.h"
#include "fuse_ipc.h"
#include "fuse_node.h"
#include "fuse_file.h"
SDT_PROVIDER_DECLARE(fusefs);
/*
* Fuse trace probe:
* arg0: verbosity. Higher numbers give more verbose messages
* arg1: Textual message
*/
SDT_PROBE_DEFINE2(fusefs, , internal, trace, "int", "char*");
#ifdef ZERO_PAD_INCOMPLETE_BUFS
static int isbzero(void *buf, size_t len);
#endif
counter_u64_t fuse_lookup_cache_hits;
counter_u64_t fuse_lookup_cache_misses;
SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_hits, CTLFLAG_RD,
&fuse_lookup_cache_hits, "number of positive cache hits in lookup");
SYSCTL_COUNTER_U64(_vfs_fusefs_stats, OID_AUTO, lookup_cache_misses, CTLFLAG_RD,
&fuse_lookup_cache_misses, "number of cache misses in lookup");
int
fuse_internal_get_cached_vnode(struct mount* mp, ino_t ino, int flags,
struct vnode **vpp)
{
struct bintime now;
struct thread *td = curthread;
uint64_t nodeid = ino;
int error;
*vpp = NULL;
error = vfs_hash_get(mp, fuse_vnode_hash(nodeid), flags, td, vpp,
fuse_vnode_cmp, &nodeid);
if (error)
return error;
/*
* Check the entry cache timeout. We have to do this within fusefs
* instead of by using cache_enter_time/cache_lookup because those
* routines are only intended to work with pathnames, not inodes
*/
if (*vpp != NULL) {
getbinuptime(&now);
if (bintime_cmp(&(VTOFUD(*vpp)->entry_cache_timeout), &now, >)){
counter_u64_add(fuse_lookup_cache_hits, 1);
return 0;
} else {
/* Entry cache timeout */
counter_u64_add(fuse_lookup_cache_misses, 1);
cache_purge(*vpp);
vput(*vpp);
*vpp = NULL;
}
}
return 0;
}
SDT_PROBE_DEFINE0(fusefs, , internal, access_vadmin);
/* Synchronously send a FUSE_ACCESS operation */
int
fuse_internal_access(struct vnode *vp,
accmode_t mode,
struct thread *td,
struct ucred *cred)
{
int err = 0;
uint32_t mask = F_OK;
int dataflags;
int vtype;
struct mount *mp;
struct fuse_dispatcher fdi;
struct fuse_access_in *fai;
struct fuse_data *data;
mp = vnode_mount(vp);
vtype = vnode_vtype(vp);
data = fuse_get_mpdata(mp);
dataflags = data->dataflags;
if (mode == 0)
return 0;
if (mode & VMODIFY_PERMS && vfs_isrdonly(mp)) {
switch (vp->v_type) {
case VDIR:
/* FALLTHROUGH */
case VLNK:
/* FALLTHROUGH */
case VREG:
return EROFS;
default:
break;
}
}
/* Unless explicitly permitted, deny everyone except the fs owner. */
if (!(dataflags & FSESS_DAEMON_CAN_SPY)) {
if (fuse_match_cred(data->daemoncred, cred))
return EPERM;
}
if (dataflags & FSESS_DEFAULT_PERMISSIONS) {
struct vattr va;
fuse_internal_getattr(vp, &va, cred, td);
return vaccess(vp->v_type, va.va_mode, va.va_uid,
va.va_gid, mode, cred, NULL);
}
if (mode & VADMIN) {
/*
* The FUSE protocol doesn't have an equivalent of VADMIN, so
* it's a bug if we ever reach this point with that bit set.
*/
SDT_PROBE0(fusefs, , internal, access_vadmin);
}
if (!fsess_isimpl(mp, FUSE_ACCESS))
return 0;
if ((mode & (VWRITE | VAPPEND)) != 0)
mask |= W_OK;
if ((mode & VREAD) != 0)
mask |= R_OK;
if ((mode & VEXEC) != 0)
mask |= X_OK;
fdisp_init(&fdi, sizeof(*fai));
fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred);
fai = fdi.indata;
fai->mask = mask;
err = fdisp_wait_answ(&fdi);
fdisp_destroy(&fdi);
if (err == ENOSYS) {
fsess_set_notimpl(mp, FUSE_ACCESS);
err = 0;
}
return err;
}
/*
* Cache FUSE attributes from attr, in attribute cache associated with vnode
* 'vp'. Optionally, if argument 'vap' is not NULL, store a copy of the
* converted attributes there as well.
*
* If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
* return the result to the caller).
*/
void
fuse_internal_cache_attrs(struct vnode *vp, struct fuse_attr *attr,
uint64_t attr_valid, uint32_t attr_valid_nsec, struct vattr *vap)
{
struct mount *mp;
struct fuse_vnode_data *fvdat;
struct fuse_data *data;
struct vattr *vp_cache_at;
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
data = fuse_get_mpdata(mp);
ASSERT_VOP_ELOCKED(vp, "fuse_internal_cache_attrs");
fuse_validity_2_bintime(attr_valid, attr_valid_nsec,
&fvdat->attr_cache_timeout);
/* Fix our buffers if the filesize changed without us knowing */
if (vnode_isreg(vp) && attr->size != fvdat->cached_attrs.va_size) {
(void)fuse_vnode_setsize(vp, attr->size);
fvdat->cached_attrs.va_size = attr->size;
}
if (attr_valid > 0 || attr_valid_nsec > 0)
vp_cache_at = &(fvdat->cached_attrs);
else if (vap != NULL)
vp_cache_at = vap;
else
return;
vattr_null(vp_cache_at);
vp_cache_at->va_fsid = mp->mnt_stat.f_fsid.val[0];
vp_cache_at->va_fileid = attr->ino;
vp_cache_at->va_mode = attr->mode & ~S_IFMT;
vp_cache_at->va_nlink = attr->nlink;
vp_cache_at->va_uid = attr->uid;
vp_cache_at->va_gid = attr->gid;
vp_cache_at->va_rdev = attr->rdev;
vp_cache_at->va_size = attr->size;
/* XXX on i386, seconds are truncated to 32 bits */
vp_cache_at->va_atime.tv_sec = attr->atime;
vp_cache_at->va_atime.tv_nsec = attr->atimensec;
vp_cache_at->va_mtime.tv_sec = attr->mtime;
vp_cache_at->va_mtime.tv_nsec = attr->mtimensec;
vp_cache_at->va_ctime.tv_sec = attr->ctime;
vp_cache_at->va_ctime.tv_nsec = attr->ctimensec;
if (fuse_libabi_geq(data, 7, 9) && attr->blksize > 0)
vp_cache_at->va_blocksize = attr->blksize;
else
vp_cache_at->va_blocksize = PAGE_SIZE;
vp_cache_at->va_type = IFTOVT(attr->mode);
vp_cache_at->va_bytes = attr->blocks * S_BLKSIZE;
vp_cache_at->va_flags = 0;
if (vap != vp_cache_at && vap != NULL)
memcpy(vap, vp_cache_at, sizeof(*vap));
}
/* fsync */
int
fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio)
{
if (tick->tk_aw_ohead.error == ENOSYS) {
fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick));
}
return 0;
}
int
fuse_internal_fsync(struct vnode *vp,
struct thread *td,
int waitfor,
bool datasync)
{
struct fuse_fsync_in *ffsi = NULL;
struct fuse_dispatcher fdi;
struct fuse_filehandle *fufh;
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct mount *mp = vnode_mount(vp);
int op = FUSE_FSYNC;
int err = 0;
if (!fsess_isimpl(vnode_mount(vp),
(vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) {
return 0;
}
if (vnode_isdir(vp))
op = FUSE_FSYNCDIR;
if (!fsess_isimpl(mp, op))
return 0;
fdisp_init(&fdi, sizeof(*ffsi));
/*
* fsync every open file handle for this file, because we can't be sure
* which file handle the caller is really referring to.
*/
LIST_FOREACH(fufh, &fvdat->handles, next) {
fdi.iosize = sizeof(*ffsi);
if (ffsi == NULL)
fdisp_make_vp(&fdi, op, vp, td, NULL);
else
fdisp_refresh_vp(&fdi, op, vp, td, NULL);
ffsi = fdi.indata;
ffsi->fh = fufh->fh_id;
ffsi->fsync_flags = 0;
if (datasync)
ffsi->fsync_flags = 1;
if (waitfor == MNT_WAIT) {
err = fdisp_wait_answ(&fdi);
} else {
fuse_insert_callback(fdi.tick,
fuse_internal_fsync_callback);
fuse_insert_message(fdi.tick, false);
}
if (err == ENOSYS) {
/* ENOSYS means "success, and don't call again" */
fsess_set_notimpl(mp, op);
err = 0;
break;
}
}
fdisp_destroy(&fdi);
return err;
}
/* Asynchronous invalidation */
SDT_PROBE_DEFINE3(fusefs, , internal, invalidate_entry,
"struct vnode*", "struct fuse_notify_inval_entry_out*", "char*");
int
fuse_internal_invalidate_entry(struct mount *mp, struct uio *uio)
{
struct fuse_notify_inval_entry_out fnieo;
struct componentname cn;
struct vnode *dvp, *vp;
char name[PATH_MAX];
int err;
if ((err = uiomove(&fnieo, sizeof(fnieo), uio)) != 0)
return (err);
if (fnieo.namelen >= sizeof(name))
return (EINVAL);
if ((err = uiomove(name, fnieo.namelen, uio)) != 0)
return (err);
name[fnieo.namelen] = '\0';
/* fusefs does not cache "." or ".." entries */
if (strncmp(name, ".", sizeof(".")) == 0 ||
strncmp(name, "..", sizeof("..")) == 0)
return (0);
if (fnieo.parent == FUSE_ROOT_ID)
err = VFS_ROOT(mp, LK_SHARED, &dvp);
else
err = fuse_internal_get_cached_vnode( mp, fnieo.parent,
LK_SHARED, &dvp);
SDT_PROBE3(fusefs, , internal, invalidate_entry, dvp, &fnieo, name);
/*
* If dvp is not in the cache, then it must've been reclaimed. And
* since fuse_vnop_reclaim does a cache_purge, name's entry must've
* been invalidated already. So we can safely return if dvp == NULL
*/
if (err != 0 || dvp == NULL)
return (err);
/*
* XXX we can't check dvp's generation because the FUSE invalidate
* entry message doesn't include it. Worse case is that we invalidate
* an entry that didn't need to be invalidated.
*/
cn.cn_nameiop = LOOKUP;
cn.cn_flags = 0; /* !MAKEENTRY means free cached entry */
cn.cn_thread = curthread;
cn.cn_cred = curthread->td_ucred;
cn.cn_lkflags = LK_SHARED;
cn.cn_pnbuf = NULL;
cn.cn_nameptr = name;
cn.cn_namelen = fnieo.namelen;
err = cache_lookup(dvp, &vp, &cn, NULL, NULL);
MPASS(err == 0);
fuse_vnode_clear_attr_cache(dvp);
vput(dvp);
return (0);
}
SDT_PROBE_DEFINE2(fusefs, , internal, invalidate_inode,
"struct vnode*", "struct fuse_notify_inval_inode_out *");
int
fuse_internal_invalidate_inode(struct mount *mp, struct uio *uio)
{
struct fuse_notify_inval_inode_out fniio;
struct vnode *vp;
int err;
if ((err = uiomove(&fniio, sizeof(fniio), uio)) != 0)
return (err);
if (fniio.ino == FUSE_ROOT_ID)
err = VFS_ROOT(mp, LK_EXCLUSIVE, &vp);
else
err = fuse_internal_get_cached_vnode(mp, fniio.ino, LK_SHARED,
&vp);
SDT_PROBE2(fusefs, , internal, invalidate_inode, vp, &fniio);
if (err != 0 || vp == NULL)
return (err);
/*
* XXX we can't check vp's generation because the FUSE invalidate
* entry message doesn't include it. Worse case is that we invalidate
* an inode that didn't need to be invalidated.
*/
/*
* Flush and invalidate buffers if off >= 0. Technically we only need
* to flush and invalidate the range of offsets [off, off + len), but
* for simplicity's sake we do everything.
*/
if (fniio.off >= 0)
fuse_io_invalbuf(vp, curthread);
fuse_vnode_clear_attr_cache(vp);
vput(vp);
return (0);
}
/* mknod */
int
fuse_internal_mknod(struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp, struct vattr *vap)
{
struct fuse_data *data;
struct fuse_mknod_in fmni;
size_t insize;
data = fuse_get_mpdata(dvp->v_mount);
fmni.mode = MAKEIMODE(vap->va_type, vap->va_mode);
fmni.rdev = vap->va_rdev;
if (fuse_libabi_geq(data, 7, 12)) {
insize = sizeof(fmni);
fmni.umask = curthread->td_proc->p_fd->fd_cmask;
} else {
insize = FUSE_COMPAT_MKNOD_IN_SIZE;
}
return (fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKNOD, &fmni,
insize, vap->va_type));
}
/* readdir */
int
fuse_internal_readdir(struct vnode *vp,
struct uio *uio,
off_t startoff,
struct fuse_filehandle *fufh,
struct fuse_iov *cookediov,
int *ncookies,
u_long *cookies)
{
int err = 0;
struct fuse_dispatcher fdi;
struct fuse_read_in *fri = NULL;
int fnd_start;
if (uio_resid(uio) == 0)
return 0;
fdisp_init(&fdi, 0);
/*
* Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p
* I/O).
*/
/*
* fnd_start is set non-zero once the offset in the directory gets
* to the startoff. This is done because directories must be read
* from the beginning (offset == 0) when fuse_vnop_readdir() needs
* to do an open of the directory.
* If it is not set non-zero here, it will be set non-zero in
* fuse_internal_readdir_processdata() when uio_offset == startoff.
*/
fnd_start = 0;
if (uio->uio_offset == startoff)
fnd_start = 1;
while (uio_resid(uio) > 0) {
fdi.iosize = sizeof(*fri);
if (fri == NULL)
fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
else
fdisp_refresh_vp(&fdi, FUSE_READDIR, vp, NULL, NULL);
fri = fdi.indata;
fri->fh = fufh->fh_id;
fri->offset = uio_offset(uio);
fri->size = MIN(uio->uio_resid,
fuse_get_mpdata(vp->v_mount)->max_read);
if ((err = fdisp_wait_answ(&fdi)))
break;
if ((err = fuse_internal_readdir_processdata(uio, startoff,
&fnd_start, fri->size, fdi.answ, fdi.iosize, cookediov,
ncookies, &cookies)))
break;
}
fdisp_destroy(&fdi);
return ((err == -1) ? 0 : err);
}
/*
* Return -1 to indicate that this readdir is finished, 0 if it copied
* all the directory data read in and it may be possible to read more
* and greater than 0 for a failure.
*/
int
fuse_internal_readdir_processdata(struct uio *uio,
off_t startoff,
int *fnd_start,
size_t reqsize,
void *buf,
size_t bufsize,
struct fuse_iov *cookediov,
int *ncookies,
u_long **cookiesp)
{
int err = 0;
int bytesavail;
size_t freclen;
struct dirent *de;
struct fuse_dirent *fudge;
u_long *cookies;
cookies = *cookiesp;
if (bufsize < FUSE_NAME_OFFSET)
return -1;
for (;;) {
if (bufsize < FUSE_NAME_OFFSET) {
err = -1;
break;
}
fudge = (struct fuse_dirent *)buf;
freclen = FUSE_DIRENT_SIZE(fudge);
if (bufsize < freclen) {
/*
* This indicates a partial directory entry at the
* end of the directory data.
*/
err = -1;
break;
}
#ifdef ZERO_PAD_INCOMPLETE_BUFS
if (isbzero(buf, FUSE_NAME_OFFSET)) {
err = -1;
break;
}
#endif
if (!fudge->namelen || fudge->namelen > MAXNAMLEN) {
err = EINVAL;
break;
}
bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *)
&fudge->namelen);
if (bytesavail > uio_resid(uio)) {
/* Out of space for the dir so we are done. */
err = -1;
break;
}
/*
* Don't start to copy the directory entries out until
* the requested offset in the directory is found.
*/
if (*fnd_start != 0) {
fiov_adjust(cookediov, bytesavail);
bzero(cookediov->base, bytesavail);
de = (struct dirent *)cookediov->base;
de->d_fileno = fudge->ino;
de->d_reclen = bytesavail;
de->d_type = fudge->type;
de->d_namlen = fudge->namelen;
memcpy((char *)cookediov->base + sizeof(struct dirent) -
MAXNAMLEN - 1,
(char *)buf + FUSE_NAME_OFFSET, fudge->namelen);
dirent_terminate(de);
err = uiomove(cookediov->base, cookediov->len, uio);
if (err)
break;
if (cookies != NULL) {
if (*ncookies == 0) {
err = -1;
break;
}
*cookies = fudge->off;
cookies++;
(*ncookies)--;
}
} else if (startoff == fudge->off)
*fnd_start = 1;
buf = (char *)buf + freclen;
bufsize -= freclen;
uio_setoffset(uio, fudge->off);
}
*cookiesp = cookies;
return err;
}
/* remove */
int
fuse_internal_remove(struct vnode *dvp,
struct vnode *vp,
struct componentname *cnp,
enum fuse_opcode op)
{
struct fuse_dispatcher fdi;
nlink_t nlink;
int err = 0;
fdisp_init(&fdi, cnp->cn_namelen + 1);
fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred);
memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
((char *)fdi.indata)[cnp->cn_namelen] = '\0';
err = fdisp_wait_answ(&fdi);
fdisp_destroy(&fdi);
if (err)
return (err);
/*
* Access the cached nlink even if the attr cached has expired. If
* it's inaccurate, the worst that will happen is:
* 1) We'll recycle the vnode even though the file has another link we
* don't know about, costing a bit of cpu time, or
* 2) We won't recycle the vnode even though all of its links are gone.
* It will linger around until vnlru reclaims it, costing a bit of
* temporary memory.
*/
nlink = VTOFUD(vp)->cached_attrs.va_nlink--;
/*
* Purge the parent's attribute cache because the daemon
* should've updated its mtime and ctime.
*/
fuse_vnode_clear_attr_cache(dvp);
/* NB: nlink could be zero if it was never cached */
if (nlink <= 1 || vnode_vtype(vp) == VDIR) {
fuse_internal_vnode_disappear(vp);
} else {
cache_purge(vp);
fuse_vnode_update(vp, FN_CTIMECHANGE);
}
return err;
}
/* rename */
int
fuse_internal_rename(struct vnode *fdvp,
struct componentname *fcnp,
struct vnode *tdvp,
struct componentname *tcnp)
{
struct fuse_dispatcher fdi;
struct fuse_rename_in *fri;
int err = 0;
fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2);
fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred);
fri = fdi.indata;
fri->newdir = VTOI(tdvp);
memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr,
fcnp->cn_namelen);
((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0';
memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1,
tcnp->cn_nameptr, tcnp->cn_namelen);
((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen +
tcnp->cn_namelen + 1] = '\0';
err = fdisp_wait_answ(&fdi);
fdisp_destroy(&fdi);
return err;
}
/* strategy */
/* entity creation */
void
fuse_internal_newentry_makerequest(struct mount *mp,
uint64_t dnid,
struct componentname *cnp,
enum fuse_opcode op,
void *buf,
size_t bufsize,
struct fuse_dispatcher *fdip)
{
fdip->iosize = bufsize + cnp->cn_namelen + 1;
fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred);
memcpy(fdip->indata, buf, bufsize);
memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen);
((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0';
}
int
fuse_internal_newentry_core(struct vnode *dvp,
struct vnode **vpp,
struct componentname *cnp,
enum vtype vtyp,
struct fuse_dispatcher *fdip)
{
int err = 0;
struct fuse_entry_out *feo;
struct mount *mp = vnode_mount(dvp);
if ((err = fdisp_wait_answ(fdip))) {
return err;
}
feo = fdip->answ;
if ((err = fuse_internal_checkentry(feo, vtyp))) {
return err;
}
err = fuse_vnode_get(mp, feo, feo->nodeid, dvp, vpp, cnp, vtyp);
if (err) {
fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred,
feo->nodeid, 1);
return err;
}
/*
* Purge the parent's attribute cache because the daemon should've
* updated its mtime and ctime
*/
fuse_vnode_clear_attr_cache(dvp);
fuse_internal_cache_attrs(*vpp, &feo->attr, feo->attr_valid,
feo->attr_valid_nsec, NULL);
return err;
}
int
fuse_internal_newentry(struct vnode *dvp,
struct vnode **vpp,
struct componentname *cnp,
enum fuse_opcode op,
void *buf,
size_t bufsize,
enum vtype vtype)
{
int err;
struct fuse_dispatcher fdi;
struct mount *mp = vnode_mount(dvp);
fdisp_init(&fdi, 0);
fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf,
bufsize, &fdi);
err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi);
fdisp_destroy(&fdi);
return err;
}
/* entity destruction */
int
fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio)
{
fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL,
((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1);
return 0;
}
void
fuse_internal_forget_send(struct mount *mp,
struct thread *td,
struct ucred *cred,
uint64_t nodeid,
uint64_t nlookup)
{
struct fuse_dispatcher fdi;
struct fuse_forget_in *ffi;
/*
* KASSERT(nlookup > 0, ("zero-times forget for vp #%llu",
* (long long unsigned) nodeid));
*/
fdisp_init(&fdi, sizeof(*ffi));
fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred);
ffi = fdi.indata;
ffi->nlookup = nlookup;
fuse_insert_message(fdi.tick, false);
fdisp_destroy(&fdi);
}
SDT_PROBE_DEFINE2(fusefs, , internal, getattr_cache_incoherent,
"struct vnode*", "struct fuse_attr_out*");
/* Fetch the vnode's attributes from the daemon*/
int
fuse_internal_do_getattr(struct vnode *vp, struct vattr *vap,
struct ucred *cred, struct thread *td)
{
struct fuse_dispatcher fdi;
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_getattr_in *fgai;
struct fuse_attr_out *fao;
off_t old_filesize = fvdat->cached_attrs.va_size;
struct timespec old_ctime = fvdat->cached_attrs.va_ctime;
struct timespec old_mtime = fvdat->cached_attrs.va_mtime;
enum vtype vtyp;
int err;
fdisp_init(&fdi, sizeof(*fgai));
fdisp_make_vp(&fdi, FUSE_GETATTR, vp, td, cred);
fgai = fdi.indata;
/*
* We could look up a file handle and set it in fgai->fh, but that
* involves extra runtime work and I'm unaware of any file systems that
* care.
*/
fgai->getattr_flags = 0;
if ((err = fdisp_wait_answ(&fdi))) {
if (err == ENOENT)
fuse_internal_vnode_disappear(vp);
goto out;
}
fao = (struct fuse_attr_out *)fdi.answ;
vtyp = IFTOVT(fao->attr.mode);
if (fvdat->flag & FN_SIZECHANGE)
fao->attr.size = old_filesize;
if (fvdat->flag & FN_CTIMECHANGE) {
fao->attr.ctime = old_ctime.tv_sec;
fao->attr.ctimensec = old_ctime.tv_nsec;
}
if (fvdat->flag & FN_MTIMECHANGE) {
fao->attr.mtime = old_mtime.tv_sec;
fao->attr.mtimensec = old_mtime.tv_nsec;
}
if (vnode_isreg(vp) &&
fvdat->cached_attrs.va_size != VNOVAL &&
fao->attr.size != fvdat->cached_attrs.va_size) {
/*
* The server changed the file's size even though we had it
* cached! That's a server bug.
*/
SDT_PROBE2(fusefs, , internal, getattr_cache_incoherent, vp,
fao);
printf("%s: cache incoherent on %s! "
"Buggy FUSE server detected. To prevent data corruption, "
"disable the data cache by mounting with -o direct_io, or "
"as directed otherwise by your FUSE server's "
"documentation\n", __func__,
vnode_mount(vp)->mnt_stat.f_mntonname);
int iosize = fuse_iosize(vp);
v_inval_buf_range(vp, 0, INT64_MAX, iosize);
}
fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
fao->attr_valid_nsec, vap);
if (vtyp != vnode_vtype(vp)) {
fuse_internal_vnode_disappear(vp);
err = ENOENT;
}
out:
fdisp_destroy(&fdi);
return err;
}
/* Read a vnode's attributes from cache or fetch them from the fuse daemon */
int
fuse_internal_getattr(struct vnode *vp, struct vattr *vap, struct ucred *cred,
struct thread *td)
{
struct vattr *attrs;
if ((attrs = VTOVA(vp)) != NULL) {
*vap = *attrs; /* struct copy */
return 0;
}
return fuse_internal_do_getattr(vp, vap, cred, td);
}
void
fuse_internal_vnode_disappear(struct vnode *vp)
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
fvdat->flag |= FN_REVOKED;
cache_purge(vp);
}
/* fuse start/stop */
SDT_PROBE_DEFINE2(fusefs, , internal, init_done,
"struct fuse_data*", "struct fuse_init_out*");
int
fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio)
{
int err = 0;
struct fuse_data *data = tick->tk_data;
struct fuse_init_out *fiio;
if ((err = tick->tk_aw_ohead.error)) {
goto out;
}
if ((err = fticket_pull(tick, uio))) {
goto out;
}
fiio = fticket_resp(tick)->base;
data->fuse_libabi_major = fiio->major;
data->fuse_libabi_minor = fiio->minor;
if (!fuse_libabi_geq(data, 7, 4)) {
/*
* With a little work we could support servers as old as 7.1.
* But there would be little payoff.
*/
SDT_PROBE2(fusefs, , internal, trace, 1,
"userpace version too low");
err = EPROTONOSUPPORT;
goto out;
}
if (fuse_libabi_geq(data, 7, 5)) {
if (fticket_resp(tick)->len == sizeof(struct fuse_init_out) ||
fticket_resp(tick)->len == FUSE_COMPAT_22_INIT_OUT_SIZE) {
data->max_write = fiio->max_write;
if (fiio->flags & FUSE_ASYNC_READ)
data->dataflags |= FSESS_ASYNC_READ;
if (fiio->flags & FUSE_POSIX_LOCKS)
data->dataflags |= FSESS_POSIX_LOCKS;
if (fiio->flags & FUSE_EXPORT_SUPPORT)
data->dataflags |= FSESS_EXPORT_SUPPORT;
/*
* Don't bother to check FUSE_BIG_WRITES, because it's
* redundant with max_write
*/
/*
* max_background and congestion_threshold are not
* implemented
*/
} else {
err = EINVAL;
}
} else {
/* Old fixed values */
data->max_write = 4096;
}
if (fuse_libabi_geq(data, 7, 6))
data->max_readahead_blocks = fiio->max_readahead / maxbcachebuf;
if (!fuse_libabi_geq(data, 7, 7))
fsess_set_notimpl(data->mp, FUSE_INTERRUPT);
if (!fuse_libabi_geq(data, 7, 8)) {
fsess_set_notimpl(data->mp, FUSE_BMAP);
fsess_set_notimpl(data->mp, FUSE_DESTROY);
}
if (fuse_libabi_geq(data, 7, 23) && fiio->time_gran >= 1 &&
fiio->time_gran <= 1000000000)
data->time_gran = fiio->time_gran;
else
data->time_gran = 1;
if (!fuse_libabi_geq(data, 7, 23))
data->cache_mode = fuse_data_cache_mode;
else if (fiio->flags & FUSE_WRITEBACK_CACHE)
data->cache_mode = FUSE_CACHE_WB;
else
data->cache_mode = FUSE_CACHE_WT;
out:
if (err) {
fdata_set_dead(data);
}
FUSE_LOCK();
data->dataflags |= FSESS_INITED;
SDT_PROBE2(fusefs, , internal, init_done, data, fiio);
wakeup(&data->ticketer);
FUSE_UNLOCK();
return 0;
}
void
fuse_internal_send_init(struct fuse_data *data, struct thread *td)
{
struct fuse_init_in *fiii;
struct fuse_dispatcher fdi;
fdisp_init(&fdi, sizeof(*fiii));
fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL);
fiii = fdi.indata;
fiii->major = FUSE_KERNEL_VERSION;
fiii->minor = FUSE_KERNEL_MINOR_VERSION;
/*
* fusefs currently reads ahead no more than one cache block at a time.
* See fuse_read_biobackend
*/
fiii->max_readahead = maxbcachebuf;
/*
* Unsupported features:
* FUSE_FILE_OPS: No known FUSE server or client supports it
* FUSE_ATOMIC_O_TRUNC: our VFS cannot support it
* FUSE_DONT_MASK: unlike Linux, FreeBSD always applies the umask, even
* when default ACLs are in use.
* FUSE_SPLICE_WRITE, FUSE_SPLICE_MOVE, FUSE_SPLICE_READ: FreeBSD
* doesn't have splice(2).
* FUSE_FLOCK_LOCKS: not yet implemented
* FUSE_HAS_IOCTL_DIR: not yet implemented
* FUSE_AUTO_INVAL_DATA: not yet implemented
* FUSE_DO_READDIRPLUS: not yet implemented
* FUSE_READDIRPLUS_AUTO: not yet implemented
* FUSE_ASYNC_DIO: not yet implemented
* FUSE_NO_OPEN_SUPPORT: not yet implemented
*/
fiii->flags = FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_EXPORT_SUPPORT
| FUSE_BIG_WRITES | FUSE_WRITEBACK_CACHE;
fuse_insert_callback(fdi.tick, fuse_internal_init_callback);
fuse_insert_message(fdi.tick, false);
fdisp_destroy(&fdi);
}
/*
* Send a FUSE_SETATTR operation with no permissions checks. If cred is NULL,
* send the request with root credentials
*/
int fuse_internal_setattr(struct vnode *vp, struct vattr *vap,
struct thread *td, struct ucred *cred)
{
struct fuse_vnode_data *fvdat;
struct fuse_dispatcher fdi;
struct fuse_setattr_in *fsai;
struct mount *mp;
pid_t pid = td->td_proc->p_pid;
struct fuse_data *data;
int dataflags;
int err = 0;
enum vtype vtyp;
int sizechanged = -1;
uint64_t newsize = 0;
mp = vnode_mount(vp);
fvdat = VTOFUD(vp);
data = fuse_get_mpdata(mp);
dataflags = data->dataflags;
fdisp_init(&fdi, sizeof(*fsai));
fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred);
if (!cred) {
fdi.finh->uid = 0;
fdi.finh->gid = 0;
}
fsai = fdi.indata;
fsai->valid = 0;
if (vap->va_uid != (uid_t)VNOVAL) {
fsai->uid = vap->va_uid;
fsai->valid |= FATTR_UID;
}
if (vap->va_gid != (gid_t)VNOVAL) {
fsai->gid = vap->va_gid;
fsai->valid |= FATTR_GID;
}
if (vap->va_size != VNOVAL) {
struct fuse_filehandle *fufh = NULL;
/*Truncate to a new value. */
fsai->size = vap->va_size;
sizechanged = 1;
newsize = vap->va_size;
fsai->valid |= FATTR_SIZE;
fuse_filehandle_getrw(vp, FWRITE, &fufh, cred, pid);
if (fufh) {
fsai->fh = fufh->fh_id;
fsai->valid |= FATTR_FH;
}
VTOFUD(vp)->flag &= ~FN_SIZECHANGE;
}
if (vap->va_atime.tv_sec != VNOVAL) {
fsai->atime = vap->va_atime.tv_sec;
fsai->atimensec = vap->va_atime.tv_nsec;
fsai->valid |= FATTR_ATIME;
if (vap->va_vaflags & VA_UTIMES_NULL)
fsai->valid |= FATTR_ATIME_NOW;
}
if (vap->va_mtime.tv_sec != VNOVAL) {
fsai->mtime = vap->va_mtime.tv_sec;
fsai->mtimensec = vap->va_mtime.tv_nsec;
fsai->valid |= FATTR_MTIME;
if (vap->va_vaflags & VA_UTIMES_NULL)
fsai->valid |= FATTR_MTIME_NOW;
} else if (fvdat->flag & FN_MTIMECHANGE) {
fsai->mtime = fvdat->cached_attrs.va_mtime.tv_sec;
fsai->mtimensec = fvdat->cached_attrs.va_mtime.tv_nsec;
fsai->valid |= FATTR_MTIME;
}
if (fuse_libabi_geq(data, 7, 23) && fvdat->flag & FN_CTIMECHANGE) {
fsai->ctime = fvdat->cached_attrs.va_ctime.tv_sec;
fsai->ctimensec = fvdat->cached_attrs.va_ctime.tv_nsec;
fsai->valid |= FATTR_CTIME;
}
if (vap->va_mode != (mode_t)VNOVAL) {
fsai->mode = vap->va_mode & ALLPERMS;
fsai->valid |= FATTR_MODE;
}
if (!fsai->valid) {
goto out;
}
if ((err = fdisp_wait_answ(&fdi)))
goto out;
vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode);
if (vnode_vtype(vp) != vtyp) {
if (vnode_vtype(vp) == VNON && vtyp != VNON) {
SDT_PROBE2(fusefs, , internal, trace, 1, "FUSE: Dang! "
"vnode_vtype is VNON and vtype isn't.");
} else {
/*
* STALE vnode, ditch
*
* The vnode has changed its type "behind our back".
* There's nothing really we can do, so let us just
* force an internal revocation and tell the caller to
* try again, if interested.
*/
fuse_internal_vnode_disappear(vp);
err = EAGAIN;
}
}
if (err == 0) {
struct fuse_attr_out *fao = (struct fuse_attr_out*)fdi.answ;
fuse_vnode_undirty_cached_timestamps(vp);
fuse_internal_cache_attrs(vp, &fao->attr, fao->attr_valid,
fao->attr_valid_nsec, NULL);
}
out:
fdisp_destroy(&fdi);
return err;
}
#ifdef ZERO_PAD_INCOMPLETE_BUFS
static int
isbzero(void *buf, size_t len)
{
int i;
for (i = 0; i < len; i++) {
if (((char *)buf)[i])
return (0);
}
return (1);
}
#endif
void
fuse_internal_init(void)
{
fuse_lookup_cache_misses = counter_u64_alloc(M_WAITOK);
fuse_lookup_cache_hits = counter_u64_alloc(M_WAITOK);
}
void
fuse_internal_destroy(void)
{
counter_u64_free(fuse_lookup_cache_hits);
counter_u64_free(fuse_lookup_cache_misses);
}