FUSE: Respect userspace FS "do-not-cache" of file attributes
The FUSE protocol demands that kernel implementations cache user filesystem file attributes (vattr data) for a maximum period of time in the range of [0, ULONG_MAX] seconds. In practice, typical requests are for 0, 1, or 10 seconds; or "a long time" to represent indefinite caching. Historically, FreeBSD FUSE has ignored this client directive entirely. This works fine for local-only filesystems, but causes consistency issues with multi-writer network filesystems. For now, respect 0 second cache TTLs and do not cache such metadata. Non-zero metadata caching TTLs in the range [0.000000001, ULONG_MAX] seconds are still cached indefinitely, because it is unclear how a userspace filesystem could do anything sensible with those semantics even if implemented. In the future, as an optimization, we should implement notify_inval_entry, etc, which provide userspace filesystems a way of evicting the kernel cache. One potentially bogus access to invalid cached attribute data was left in fuse_io_strategy. It is restricted behind the undocumented and non-default "vfs.fuse.fix_broken_io" sysctl or "brokenio" mount option; maybe these are deadcode and can be eliminated? Some minor APIs changed to facilitate this: 1. Attribute cache validity is tracked in FUSE inodes ("fuse_vnode_data"). 2. cache_attrs() respects the provided TTL and only caches in the FUSE inode if TTL > 0. It also grows an "out" argument, which, if non-NULL, stores the translated fuse_attr (even if not suitable for caching). 3. FUSE VTOVA(vp) returns NULL if the vnode's cache is invalid, to help avoid programming mistakes. 4. A VOP_LINK check for potential nlink overflow prior to invoking the FUSE link op was weakened (only performed when we have a valid attr cache). The check is racy in a multi-writer network filesystem anyway -- classic TOCTOU. We have to trust any userspace filesystem that rejects local caching to account for it correctly. PR: 230258 (inspired by; does not fix)
This commit is contained in:
parent
447b492eb8
commit
78a7722fbc
@ -373,7 +373,6 @@ fuse_internal_readdir_processdata(struct uio *uio,
|
||||
|
||||
/* remove */
|
||||
|
||||
#define INVALIDATE_CACHED_VATTRS_UPON_UNLINK 1
|
||||
int
|
||||
fuse_internal_remove(struct vnode *dvp,
|
||||
struct vnode *vp,
|
||||
@ -381,15 +380,11 @@ fuse_internal_remove(struct vnode *dvp,
|
||||
enum fuse_opcode op)
|
||||
{
|
||||
struct fuse_dispatcher fdi;
|
||||
struct fuse_vnode_data *fvdat;
|
||||
int err;
|
||||
|
||||
struct vattr *vap = VTOVA(vp);
|
||||
|
||||
#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK
|
||||
int need_invalidate = 0;
|
||||
uint64_t target_nlink = 0;
|
||||
|
||||
#endif
|
||||
int err = 0;
|
||||
err = 0;
|
||||
fvdat = VTOFUD(vp);
|
||||
|
||||
debug_printf("dvp=%p, cnp=%p, op=%d\n", vp, cnp, op);
|
||||
|
||||
@ -399,13 +394,6 @@ fuse_internal_remove(struct vnode *dvp,
|
||||
memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen);
|
||||
((char *)fdi.indata)[cnp->cn_namelen] = '\0';
|
||||
|
||||
#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK
|
||||
if (vap->va_nlink > 1) {
|
||||
need_invalidate = 1;
|
||||
target_nlink = vap->va_nlink;
|
||||
}
|
||||
#endif
|
||||
|
||||
err = fdisp_wait_answ(&fdi);
|
||||
fdisp_destroy(&fdi);
|
||||
return err;
|
||||
@ -489,7 +477,7 @@ fuse_internal_newentry_core(struct vnode *dvp,
|
||||
feo->nodeid, 1);
|
||||
return err;
|
||||
}
|
||||
cache_attrs(*vpp, feo);
|
||||
cache_attrs(*vpp, feo, NULL);
|
||||
|
||||
return err;
|
||||
}
|
||||
@ -563,6 +551,7 @@ fuse_internal_vnode_disappear(struct vnode *vp)
|
||||
|
||||
ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear");
|
||||
fvdat->flag |= FN_REVOKED;
|
||||
fvdat->valid_attr_cache = false;
|
||||
cache_purge(vp);
|
||||
}
|
||||
|
||||
|
@ -200,15 +200,47 @@ fuse_internal_access(struct vnode *vp,
|
||||
|
||||
/* attributes */
|
||||
|
||||
/*
|
||||
* Cache FUSE attributes 'fat', with nominal expiration
|
||||
* 'attr_valid'.'attr_valid_nsec', in attr cache associated with vnode 'vp'.
|
||||
* Optionally, if argument 'vap' is not NULL, store a copy of the converted
|
||||
* attributes there as well.
|
||||
*
|
||||
* If the nominal attribute cache TTL is zero, do not cache on the 'vp' (but do
|
||||
* return the result to the caller).
|
||||
*/
|
||||
static __inline
|
||||
void
|
||||
fuse_internal_attr_fat2vat(struct mount *mp,
|
||||
fuse_internal_attr_fat2vat(struct vnode *vp,
|
||||
struct fuse_attr *fat,
|
||||
uint64_t attr_valid,
|
||||
uint32_t attr_valid_nsec,
|
||||
struct vattr *vap)
|
||||
{
|
||||
struct mount *mp;
|
||||
struct fuse_vnode_data *fvdat;
|
||||
struct vattr *vp_cache_at;
|
||||
|
||||
mp = vnode_mount(vp);
|
||||
fvdat = VTOFUD(vp);
|
||||
|
||||
DEBUGX(FUSE_DEBUG_INTERNAL,
|
||||
"node #%ju, mode 0%o\n", (uintmax_t)fat->ino, fat->mode);
|
||||
|
||||
/* Honor explicit do-not-cache requests from user filesystems. */
|
||||
if (attr_valid == 0 && attr_valid_nsec == 0)
|
||||
fvdat->valid_attr_cache = false;
|
||||
else
|
||||
fvdat->valid_attr_cache = true;
|
||||
|
||||
vp_cache_at = VTOVA(vp);
|
||||
|
||||
if (vap == NULL && vp_cache_at == NULL)
|
||||
return;
|
||||
|
||||
if (vap == NULL)
|
||||
vap = vp_cache_at;
|
||||
|
||||
vattr_null(vap);
|
||||
|
||||
vap->va_fsid = mp->mnt_stat.f_fsid.val[0];
|
||||
@ -227,21 +259,17 @@ fuse_internal_attr_fat2vat(struct mount *mp,
|
||||
vap->va_ctime.tv_nsec = fat->ctimensec;
|
||||
vap->va_blocksize = PAGE_SIZE;
|
||||
vap->va_type = IFTOVT(fat->mode);
|
||||
|
||||
#if (S_BLKSIZE == 512)
|
||||
/* Optimize this case */
|
||||
vap->va_bytes = fat->blocks << 9;
|
||||
#else
|
||||
vap->va_bytes = fat->blocks * S_BLKSIZE;
|
||||
#endif
|
||||
|
||||
vap->va_flags = 0;
|
||||
|
||||
if (vap != vp_cache_at && vp_cache_at != NULL)
|
||||
memcpy(vp_cache_at, vap, sizeof(*vap));
|
||||
}
|
||||
|
||||
|
||||
#define cache_attrs(vp, fuse_out) \
|
||||
fuse_internal_attr_fat2vat(vnode_mount(vp), &(fuse_out)->attr, \
|
||||
VTOVA(vp));
|
||||
#define cache_attrs(vp, fuse_out, vap_out) \
|
||||
fuse_internal_attr_fat2vat((vp), &(fuse_out)->attr, \
|
||||
(fuse_out)->attr_valid, (fuse_out)->attr_valid_nsec, (vap_out))
|
||||
|
||||
/* fsync */
|
||||
|
||||
|
@ -655,6 +655,7 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
|
||||
uiop->uio_offset = ((off_t)bp->b_blkno) * biosize;
|
||||
error = fuse_read_directbackend(vp, uiop, cred, fufh);
|
||||
|
||||
/* XXXCEM: Potentially invalid access to cached_attrs here */
|
||||
if ((!error && uiop->uio_resid) ||
|
||||
(fsess_opt_brokenio(vnode_mount(vp)) && error == EIO &&
|
||||
uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 &&
|
||||
|
@ -147,6 +147,7 @@ fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat,
|
||||
int i;
|
||||
|
||||
fvdat->nid = nodeid;
|
||||
vattr_null(&fvdat->cached_attrs);
|
||||
if (nodeid == FUSE_ROOT_ID) {
|
||||
vp->v_vflag |= VV_ROOT;
|
||||
}
|
||||
|
@ -86,6 +86,7 @@ struct fuse_vnode_data {
|
||||
uint32_t flag;
|
||||
|
||||
/** meta **/
|
||||
bool valid_attr_cache;
|
||||
struct vattr cached_attrs;
|
||||
off_t filesize;
|
||||
uint64_t nlookup;
|
||||
@ -95,7 +96,9 @@ struct fuse_vnode_data {
|
||||
#define VTOFUD(vp) \
|
||||
((struct fuse_vnode_data *)((vp)->v_data))
|
||||
#define VTOI(vp) (VTOFUD(vp)->nid)
|
||||
#define VTOVA(vp) (&(VTOFUD(vp)->cached_attrs))
|
||||
#define VTOVA(vp) \
|
||||
(VTOFUD(vp)->valid_attr_cache ? \
|
||||
&(VTOFUD(vp)->cached_attrs) : NULL)
|
||||
#define VTOILLU(vp) ((uint64_t)(VTOFUD(vp) ? VTOI(vp) : 0))
|
||||
|
||||
#define FUSE_NULL_ID 0
|
||||
|
@ -518,10 +518,8 @@ fuse_vnop_getattr(struct vop_getattr_args *ap)
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
cache_attrs(vp, (struct fuse_attr_out *)fdi.answ);
|
||||
if (vap != VTOVA(vp)) {
|
||||
memcpy(vap, VTOVA(vp), sizeof(*vap));
|
||||
}
|
||||
|
||||
cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, vap);
|
||||
if (vap->va_type != vnode_vtype(vp)) {
|
||||
fuse_internal_vnode_disappear(vp);
|
||||
err = ENOENT;
|
||||
@ -628,9 +626,15 @@ fuse_vnop_link(struct vop_link_args *ap)
|
||||
if (vnode_mount(tdvp) != vnode_mount(vp)) {
|
||||
return EXDEV;
|
||||
}
|
||||
if (vap->va_nlink >= FUSE_LINK_MAX) {
|
||||
|
||||
/*
|
||||
* This is a seatbelt check to protect naive userspace filesystems from
|
||||
* themselves and the limitations of the FUSE IPC protocol. If a
|
||||
* filesystem does not allow attribute caching, assume it is capable of
|
||||
* validating that nlink does not overflow.
|
||||
*/
|
||||
if (vap != NULL && vap->va_nlink >= FUSE_LINK_MAX)
|
||||
return EMLINK;
|
||||
}
|
||||
fli.oldnodeid = VTOI(vp);
|
||||
|
||||
fdisp_init(&fdi, 0);
|
||||
@ -966,9 +970,11 @@ fuse_vnop_lookup(struct vop_lookup_args *ap)
|
||||
}
|
||||
|
||||
if (op == FUSE_GETATTR) {
|
||||
cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ);
|
||||
cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ,
|
||||
NULL);
|
||||
} else {
|
||||
cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ);
|
||||
cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ,
|
||||
NULL);
|
||||
}
|
||||
|
||||
/* Insert name into cache if appropriate. */
|
||||
@ -1644,7 +1650,7 @@ fuse_vnop_setattr(struct vop_setattr_args *ap)
|
||||
}
|
||||
}
|
||||
if (!err && !sizechanged) {
|
||||
cache_attrs(vp, (struct fuse_attr_out *)fdi.answ);
|
||||
cache_attrs(vp, (struct fuse_attr_out *)fdi.answ, NULL);
|
||||
}
|
||||
out:
|
||||
fdisp_destroy(&fdi);
|
||||
|
Loading…
Reference in New Issue
Block a user