fusefs: rewrite vop_getpages and vop_putpages
Use the standard facilities for getpages and putpages instead of bespoke implementations that don't work well with the writeback cache. This has several corollaries: * Change the way we handle short reads _again_. vfs_bio_getpages doesn't provide any way to handle unexpected short reads. Plus, I found some more lock-order problems. So now when the short read is detected we'll just clear the vnode's attribute cache, forcing the file size to be requeried the next time it's needed. VOP_GETPAGES doesn't have any way to indicate a short read to the "caller", so we just bzero the rest of the page whenever a short read happens. * Change the way we decide when to set the FUSE_WRITE_CACHE bit. We now set it for clustered writes even when the writeback cache is not in use. Sponsored by: The FreeBSD Foundation
This commit is contained in:
parent
48417ae0ba
commit
b9e2019755
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=349378
@ -100,6 +100,12 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include "fuse_ipc.h"
|
#include "fuse_ipc.h"
|
||||||
#include "fuse_io.h"
|
#include "fuse_io.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set in a struct buf to indicate that the write came from the buffer cache
|
||||||
|
* and the originating cred and pid are no longer known.
|
||||||
|
*/
|
||||||
|
#define B_FUSEFS_WRITE_CACHE B_FS_FLAG1
|
||||||
|
|
||||||
SDT_PROVIDER_DECLARE(fusefs);
|
SDT_PROVIDER_DECLARE(fusefs);
|
||||||
/*
|
/*
|
||||||
* Fuse trace probe:
|
* Fuse trace probe:
|
||||||
@ -164,10 +170,10 @@ fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,
|
|||||||
|
|
||||||
SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
|
SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
|
||||||
"int", "struct ucred*", "struct fuse_filehandle*");
|
"int", "struct ucred*", "struct fuse_filehandle*");
|
||||||
SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
|
SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
|
||||||
"struct uio*", "int", "bool", "struct ucred*");
|
"struct uio*", "int", "struct ucred*");
|
||||||
int
|
int
|
||||||
fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
|
fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
|
||||||
struct ucred *cred, pid_t pid)
|
struct ucred *cred, pid_t pid)
|
||||||
{
|
{
|
||||||
struct fuse_filehandle *fufh;
|
struct fuse_filehandle *fufh;
|
||||||
@ -188,8 +194,8 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
|
|||||||
closefufh = true;
|
closefufh = true;
|
||||||
}
|
}
|
||||||
else if (err) {
|
else if (err) {
|
||||||
SDT_PROBE5(fusefs, , io, io_dispatch_filehandles_closed,
|
SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed,
|
||||||
vp, uio, ioflag, pages, cred);
|
vp, uio, ioflag, cred);
|
||||||
printf("FUSE: io dispatch: filehandles are closed\n");
|
printf("FUSE: io dispatch: filehandles are closed\n");
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
@ -236,15 +242,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
|
|||||||
|
|
||||||
start = uio->uio_offset;
|
start = uio->uio_offset;
|
||||||
end = start + uio->uio_resid;
|
end = start + uio->uio_resid;
|
||||||
/*
|
KASSERT((ioflag & (IO_VMIO | IO_DIRECT)) !=
|
||||||
* Invalidate the write cache unless we're coming from
|
(IO_VMIO | IO_DIRECT),
|
||||||
* VOP_PUTPAGES, in which case we're writing _from_ the
|
("IO_DIRECT used for a cache flush?"));
|
||||||
* write cache
|
/* Invalidate the write cache when writing directly */
|
||||||
*/
|
v_inval_buf_range(vp, start, end, iosize);
|
||||||
if (!pages )
|
|
||||||
v_inval_buf_range(vp, start, end, iosize);
|
|
||||||
err = fuse_write_directbackend(vp, uio, cred, fufh,
|
err = fuse_write_directbackend(vp, uio, cred, fufh,
|
||||||
filesize, ioflag, pages);
|
filesize, ioflag, false);
|
||||||
} else {
|
} else {
|
||||||
SDT_PROBE2(fusefs, , io, trace, 1,
|
SDT_PROBE2(fusefs, , io, trace, 1,
|
||||||
"buffered write of vnode");
|
"buffered write of vnode");
|
||||||
@ -352,8 +356,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
n = 0;
|
n = 0;
|
||||||
if (on < bcount - (intptr_t)bp->b_fsprivate1)
|
if (on < bcount - bp->b_resid)
|
||||||
n = MIN((unsigned)(bcount - (intptr_t)bp->b_fsprivate1 - on),
|
n = MIN((unsigned)(bcount - bp->b_resid - on),
|
||||||
uio->uio_resid);
|
uio->uio_resid);
|
||||||
if (n > 0) {
|
if (n > 0) {
|
||||||
SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
|
SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
|
||||||
@ -362,10 +366,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
|
|||||||
vfs_bio_brelse(bp, ioflag);
|
vfs_bio_brelse(bp, ioflag);
|
||||||
SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
|
SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
|
||||||
uio->uio_resid, n, bp);
|
uio->uio_resid, n, bp);
|
||||||
if ((intptr_t)bp->b_fsprivate1 > 0) {
|
if (bp->b_resid > 0) {
|
||||||
/* Short read indicates EOF */
|
/* Short read indicates EOF */
|
||||||
(void)fuse_vnode_setsize(vp, uio->uio_offset);
|
|
||||||
bp->b_fsprivate1 = (void*)0;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -725,6 +727,21 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
|
|||||||
brelse(bp);
|
brelse(bp);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (bp->b_resid > 0) {
|
||||||
|
/*
|
||||||
|
* Short read indicates EOF. Update file size
|
||||||
|
* from the server and try again.
|
||||||
|
*/
|
||||||
|
SDT_PROBE2(fusefs, , io, trace, 1,
|
||||||
|
"Short read during a RMW");
|
||||||
|
brelse(bp);
|
||||||
|
err = fuse_vnode_size(vp, &filesize, cred,
|
||||||
|
curthread);
|
||||||
|
if (err)
|
||||||
|
break;
|
||||||
|
else
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (bp->b_wcred == NOCRED)
|
if (bp->b_wcred == NOCRED)
|
||||||
bp->b_wcred = crhold(cred);
|
bp->b_wcred = crhold(cred);
|
||||||
@ -805,8 +822,11 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
|
|||||||
|
|
||||||
vfs_bio_set_flags(bp, ioflag);
|
vfs_bio_set_flags(bp, ioflag);
|
||||||
|
|
||||||
|
bp->b_flags |= B_FUSEFS_WRITE_CACHE;
|
||||||
if (ioflag & IO_SYNC) {
|
if (ioflag & IO_SYNC) {
|
||||||
SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp);
|
SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp);
|
||||||
|
if (!(ioflag & IO_VMIO))
|
||||||
|
bp->b_flags &= ~B_FUSEFS_WRITE_CACHE;
|
||||||
err = bwrite(bp);
|
err = bwrite(bp);
|
||||||
} else if (vm_page_count_severe() ||
|
} else if (vm_page_count_severe() ||
|
||||||
buf_dirty_count_severe() ||
|
buf_dirty_count_severe() ||
|
||||||
@ -864,7 +884,6 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
|
|||||||
fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
|
fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
|
||||||
cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
|
cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
|
||||||
error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
|
error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
|
||||||
bp->b_fsprivate1 = (void*)(intptr_t)0;
|
|
||||||
if (bp->b_iocmd == BIO_READ && error == EBADF) {
|
if (bp->b_iocmd == BIO_READ && error == EBADF) {
|
||||||
/*
|
/*
|
||||||
* This may be a read-modify-write operation on a cached file
|
* This may be a read-modify-write operation on a cached file
|
||||||
@ -905,39 +924,40 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
|
|||||||
|
|
||||||
uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
|
uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
|
||||||
error = fuse_read_directbackend(vp, uiop, cred, fufh);
|
error = fuse_read_directbackend(vp, uiop, cred, fufh);
|
||||||
left = uiop->uio_resid;
|
|
||||||
/*
|
/*
|
||||||
* Store the amount we failed to read in the buffer's private
|
* Store the amount we failed to read in the buffer's private
|
||||||
* field, so callers can truncate the file if necessary'
|
* field, so callers can truncate the file if necessary'
|
||||||
*/
|
*/
|
||||||
bp->b_fsprivate1 = (void*)(intptr_t)left;
|
|
||||||
|
|
||||||
if (!error && uiop->uio_resid) {
|
if (!error && uiop->uio_resid) {
|
||||||
int nread = bp->b_bcount - uiop->uio_resid;
|
int nread = bp->b_bcount - uiop->uio_resid;
|
||||||
|
left = uiop->uio_resid;
|
||||||
bzero((char *)bp->b_data + nread, left);
|
bzero((char *)bp->b_data + nread, left);
|
||||||
|
|
||||||
if (fuse_data_cache_mode != FUSE_CACHE_WB ||
|
if ((fvdat->flag & FN_SIZECHANGE) == 0) {
|
||||||
(fvdat->flag & FN_SIZECHANGE) == 0) {
|
|
||||||
/*
|
/*
|
||||||
* A short read with no error, when not using
|
* A short read with no error, when not using
|
||||||
* direct io, and when no writes are cached,
|
* direct io, and when no writes are cached,
|
||||||
* indicates EOF. Update the file size
|
* indicates EOF caused by a server-side
|
||||||
* accordingly. We must still bzero the
|
* truncation. Clear the attr cache so we'll
|
||||||
* remaining buffer so uninitialized data
|
* pick up the new file size and timestamps.
|
||||||
* doesn't get exposed by a future truncate
|
*
|
||||||
* that extends the file.
|
* We must still bzero the remaining buffer so
|
||||||
|
* uninitialized data doesn't get exposed by a
|
||||||
|
* future truncate that extends the file.
|
||||||
*
|
*
|
||||||
* To prevent lock order problems, we must
|
* To prevent lock order problems, we must
|
||||||
* truncate the file upstack
|
* truncate the file upstack, not here.
|
||||||
*/
|
*/
|
||||||
SDT_PROBE2(fusefs, , io, trace, 1,
|
SDT_PROBE2(fusefs, , io, trace, 1,
|
||||||
"Short read of a clean file");
|
"Short read of a clean file");
|
||||||
uiop->uio_resid = 0;
|
fuse_vnode_clear_attr_cache(vp);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* If dirty writes _are_ cached beyond EOF,
|
* If dirty writes _are_ cached beyond EOF,
|
||||||
* that indicates a newly created hole that the
|
* that indicates a newly created hole that the
|
||||||
* server doesn't know about.
|
* server doesn't know about. Those don't pose
|
||||||
|
* any problem.
|
||||||
* XXX: we don't currently track whether dirty
|
* XXX: we don't currently track whether dirty
|
||||||
* writes are cached beyond EOF, before EOF, or
|
* writes are cached beyond EOF, before EOF, or
|
||||||
* both.
|
* both.
|
||||||
@ -976,8 +996,9 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
|
|||||||
io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
|
io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
|
||||||
uiop->uio_rw = UIO_WRITE;
|
uiop->uio_rw = UIO_WRITE;
|
||||||
|
|
||||||
|
bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE;
|
||||||
error = fuse_write_directbackend(vp, uiop, cred, fufh,
|
error = fuse_write_directbackend(vp, uiop, cred, fufh,
|
||||||
filesize, 0, false);
|
filesize, 0, pages);
|
||||||
|
|
||||||
if (error == EINTR || error == ETIMEDOUT) {
|
if (error == EINTR || error == ETIMEDOUT) {
|
||||||
bp->b_flags &= ~(B_INVAL | B_NOCACHE);
|
bp->b_flags &= ~(B_INVAL | B_NOCACHE);
|
||||||
|
@ -60,7 +60,7 @@
|
|||||||
#ifndef _FUSE_IO_H_
|
#ifndef _FUSE_IO_H_
|
||||||
#define _FUSE_IO_H_
|
#define _FUSE_IO_H_
|
||||||
|
|
||||||
int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
|
int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
|
||||||
struct ucred *cred, pid_t pid);
|
struct ucred *cred, pid_t pid);
|
||||||
int fuse_io_strategy(struct vnode *vp, struct buf *bp);
|
int fuse_io_strategy(struct vnode *vp, struct buf *bp);
|
||||||
int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td);
|
int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td);
|
||||||
|
@ -150,7 +150,6 @@ static vop_strategy_t fuse_vnop_strategy;
|
|||||||
static vop_symlink_t fuse_vnop_symlink;
|
static vop_symlink_t fuse_vnop_symlink;
|
||||||
static vop_write_t fuse_vnop_write;
|
static vop_write_t fuse_vnop_write;
|
||||||
static vop_getpages_t fuse_vnop_getpages;
|
static vop_getpages_t fuse_vnop_getpages;
|
||||||
static vop_putpages_t fuse_vnop_putpages;
|
|
||||||
static vop_print_t fuse_vnop_print;
|
static vop_print_t fuse_vnop_print;
|
||||||
static vop_vptofh_t fuse_vnop_vptofh;
|
static vop_vptofh_t fuse_vnop_vptofh;
|
||||||
|
|
||||||
@ -215,7 +214,6 @@ struct vop_vector fuse_vnops = {
|
|||||||
.vop_symlink = fuse_vnop_symlink,
|
.vop_symlink = fuse_vnop_symlink,
|
||||||
.vop_write = fuse_vnop_write,
|
.vop_write = fuse_vnop_write,
|
||||||
.vop_getpages = fuse_vnop_getpages,
|
.vop_getpages = fuse_vnop_getpages,
|
||||||
.vop_putpages = fuse_vnop_putpages,
|
|
||||||
.vop_print = fuse_vnop_print,
|
.vop_print = fuse_vnop_print,
|
||||||
.vop_vptofh = fuse_vnop_vptofh,
|
.vop_vptofh = fuse_vnop_vptofh,
|
||||||
};
|
};
|
||||||
@ -1381,7 +1379,7 @@ fuse_vnop_read(struct vop_read_args *ap)
|
|||||||
ioflag |= IO_DIRECT;
|
ioflag |= IO_DIRECT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fuse_io_dispatch(vp, uio, ioflag, false, cred, pid);
|
return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1960,10 +1958,39 @@ fuse_vnop_write(struct vop_write_args *ap)
|
|||||||
ioflag |= IO_DIRECT;
|
ioflag |= IO_DIRECT;
|
||||||
}
|
}
|
||||||
|
|
||||||
return fuse_io_dispatch(vp, uio, ioflag, false, cred, pid);
|
return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
|
||||||
|
}
|
||||||
|
|
||||||
|
static daddr_t
|
||||||
|
fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
|
||||||
|
{
|
||||||
|
const int biosize = fuse_iosize(vp);
|
||||||
|
|
||||||
|
return (off / biosize);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn)
|
||||||
|
{
|
||||||
|
off_t filesize;
|
||||||
|
int blksz, err;
|
||||||
|
const int biosize = fuse_iosize(vp);
|
||||||
|
|
||||||
|
err = fuse_vnode_size(vp, &filesize, NULL, NULL);
|
||||||
|
KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here"));
|
||||||
|
if (err)
|
||||||
|
return biosize;
|
||||||
|
|
||||||
|
if ((off_t)lbn * biosize >= filesize) {
|
||||||
|
blksz = 0;
|
||||||
|
} else if ((off_t)(lbn + 1) * biosize > filesize) {
|
||||||
|
blksz = filesize - (off_t)lbn *biosize;
|
||||||
|
} else {
|
||||||
|
blksz = biosize;
|
||||||
|
}
|
||||||
|
return (blksz);
|
||||||
}
|
}
|
||||||
|
|
||||||
SDT_PROBE_DEFINE1(fusefs, , vnops, vnop_getpages_error, "int");
|
|
||||||
/*
|
/*
|
||||||
struct vnop_getpages_args {
|
struct vnop_getpages_args {
|
||||||
struct vnode *a_vp;
|
struct vnode *a_vp;
|
||||||
@ -1975,23 +2002,7 @@ SDT_PROBE_DEFINE1(fusefs, , vnops, vnop_getpages_error, "int");
|
|||||||
static int
|
static int
|
||||||
fuse_vnop_getpages(struct vop_getpages_args *ap)
|
fuse_vnop_getpages(struct vop_getpages_args *ap)
|
||||||
{
|
{
|
||||||
int i, error, nextoff, size, toff, count, npages;
|
struct vnode *vp = ap->a_vp;
|
||||||
struct uio uio;
|
|
||||||
struct iovec iov;
|
|
||||||
vm_offset_t kva;
|
|
||||||
struct buf *bp;
|
|
||||||
struct vnode *vp;
|
|
||||||
struct thread *td;
|
|
||||||
struct ucred *cred;
|
|
||||||
vm_page_t *pages;
|
|
||||||
pid_t pid = curthread->td_proc->p_pid;
|
|
||||||
|
|
||||||
vp = ap->a_vp;
|
|
||||||
KASSERT(vp->v_object, ("objectless vp passed to getpages"));
|
|
||||||
td = curthread; /* XXX */
|
|
||||||
cred = curthread->td_ucred; /* XXX */
|
|
||||||
pages = ap->a_m;
|
|
||||||
npages = ap->a_count;
|
|
||||||
|
|
||||||
if (!fsess_opt_mmap(vnode_mount(vp))) {
|
if (!fsess_opt_mmap(vnode_mount(vp))) {
|
||||||
SDT_PROBE2(fusefs, , vnops, trace, 1,
|
SDT_PROBE2(fusefs, , vnops, trace, 1,
|
||||||
@ -1999,190 +2010,8 @@ fuse_vnop_getpages(struct vop_getpages_args *ap)
|
|||||||
return (VM_PAGER_ERROR);
|
return (VM_PAGER_ERROR);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
|
||||||
* If the last page is partially valid, just return it and allow
|
ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
|
||||||
* the pager to zero-out the blanks. Partially valid pages can
|
|
||||||
* only occur at the file EOF.
|
|
||||||
*
|
|
||||||
* XXXGL: is that true for FUSE, which is a local filesystem,
|
|
||||||
* but still somewhat disconnected from the kernel?
|
|
||||||
*/
|
|
||||||
VM_OBJECT_WLOCK(vp->v_object);
|
|
||||||
if (pages[npages - 1]->valid != 0 && --npages == 0)
|
|
||||||
goto out;
|
|
||||||
VM_OBJECT_WUNLOCK(vp->v_object);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We use only the kva address for the buffer, but this is extremely
|
|
||||||
* convenient and fast.
|
|
||||||
*/
|
|
||||||
bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
|
|
||||||
|
|
||||||
kva = (vm_offset_t)bp->b_data;
|
|
||||||
pmap_qenter(kva, pages, npages);
|
|
||||||
VM_CNT_INC(v_vnodein);
|
|
||||||
VM_CNT_ADD(v_vnodepgsin, npages);
|
|
||||||
|
|
||||||
count = npages << PAGE_SHIFT;
|
|
||||||
iov.iov_base = (caddr_t)kva;
|
|
||||||
iov.iov_len = count;
|
|
||||||
uio.uio_iov = &iov;
|
|
||||||
uio.uio_iovcnt = 1;
|
|
||||||
uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
|
|
||||||
uio.uio_resid = count;
|
|
||||||
uio.uio_segflg = UIO_SYSSPACE;
|
|
||||||
uio.uio_rw = UIO_READ;
|
|
||||||
uio.uio_td = td;
|
|
||||||
|
|
||||||
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, true, cred, pid);
|
|
||||||
pmap_qremove(kva, npages);
|
|
||||||
|
|
||||||
uma_zfree(fuse_pbuf_zone, bp);
|
|
||||||
|
|
||||||
if (error && (uio.uio_resid == count)) {
|
|
||||||
SDT_PROBE1(fusefs, , vnops, vnop_getpages_error, error);
|
|
||||||
return VM_PAGER_ERROR;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* Calculate the number of bytes read and validate only that number
|
|
||||||
* of bytes. Note that due to pending writes, size may be 0. This
|
|
||||||
* does not mean that the remaining data is invalid!
|
|
||||||
*/
|
|
||||||
|
|
||||||
size = count - uio.uio_resid;
|
|
||||||
VM_OBJECT_WLOCK(vp->v_object);
|
|
||||||
fuse_vm_page_lock_queues();
|
|
||||||
for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
|
|
||||||
vm_page_t m;
|
|
||||||
|
|
||||||
nextoff = toff + PAGE_SIZE;
|
|
||||||
m = pages[i];
|
|
||||||
|
|
||||||
if (nextoff <= size) {
|
|
||||||
/*
|
|
||||||
* Read operation filled an entire page
|
|
||||||
*/
|
|
||||||
m->valid = VM_PAGE_BITS_ALL;
|
|
||||||
KASSERT(m->dirty == 0,
|
|
||||||
("fuse_getpages: page %p is dirty", m));
|
|
||||||
} else if (size > toff) {
|
|
||||||
/*
|
|
||||||
* Read operation filled a partial page.
|
|
||||||
*/
|
|
||||||
m->valid = 0;
|
|
||||||
vm_page_set_valid_range(m, 0, size - toff);
|
|
||||||
KASSERT(m->dirty == 0,
|
|
||||||
("fuse_getpages: page %p is dirty", m));
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* Read operation was short. If no error occurred
|
|
||||||
* we may have hit a zero-fill section. We simply
|
|
||||||
* leave valid set to 0.
|
|
||||||
*/
|
|
||||||
;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
fuse_vm_page_unlock_queues();
|
|
||||||
out:
|
|
||||||
VM_OBJECT_WUNLOCK(vp->v_object);
|
|
||||||
if (ap->a_rbehind)
|
|
||||||
*ap->a_rbehind = 0;
|
|
||||||
if (ap->a_rahead)
|
|
||||||
*ap->a_rahead = 0;
|
|
||||||
return (VM_PAGER_OK);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
struct vnop_putpages_args {
|
|
||||||
struct vnode *a_vp;
|
|
||||||
vm_page_t *a_m;
|
|
||||||
int a_count;
|
|
||||||
int a_sync;
|
|
||||||
int *a_rtvals;
|
|
||||||
vm_ooffset_t a_offset;
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
fuse_vnop_putpages(struct vop_putpages_args *ap)
|
|
||||||
{
|
|
||||||
struct uio uio;
|
|
||||||
struct iovec iov;
|
|
||||||
vm_offset_t kva;
|
|
||||||
struct buf *bp;
|
|
||||||
int i, error, npages, count;
|
|
||||||
off_t offset;
|
|
||||||
int *rtvals;
|
|
||||||
struct vnode *vp;
|
|
||||||
struct thread *td;
|
|
||||||
struct ucred *cred;
|
|
||||||
vm_page_t *pages;
|
|
||||||
vm_ooffset_t fsize;
|
|
||||||
pid_t pid = curthread->td_proc->p_pid;
|
|
||||||
|
|
||||||
vp = ap->a_vp;
|
|
||||||
KASSERT(vp->v_object, ("objectless vp passed to putpages"));
|
|
||||||
fsize = vp->v_object->un_pager.vnp.vnp_size;
|
|
||||||
td = curthread; /* XXX */
|
|
||||||
cred = curthread->td_ucred; /* XXX */
|
|
||||||
pages = ap->a_m;
|
|
||||||
count = ap->a_count;
|
|
||||||
rtvals = ap->a_rtvals;
|
|
||||||
npages = btoc(count);
|
|
||||||
offset = IDX_TO_OFF(pages[0]->pindex);
|
|
||||||
|
|
||||||
if (!fsess_opt_mmap(vnode_mount(vp))) {
|
|
||||||
SDT_PROBE2(fusefs, , vnops, trace, 1,
|
|
||||||
"called on non-cacheable vnode??\n");
|
|
||||||
}
|
|
||||||
for (i = 0; i < npages; i++)
|
|
||||||
rtvals[i] = VM_PAGER_AGAIN;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* When putting pages, do not extend file past EOF.
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (offset + count > fsize) {
|
|
||||||
count = fsize - offset;
|
|
||||||
if (count < 0)
|
|
||||||
count = 0;
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
* We use only the kva address for the buffer, but this is extremely
|
|
||||||
* convenient and fast.
|
|
||||||
*/
|
|
||||||
bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
|
|
||||||
|
|
||||||
kva = (vm_offset_t)bp->b_data;
|
|
||||||
pmap_qenter(kva, pages, npages);
|
|
||||||
VM_CNT_INC(v_vnodeout);
|
|
||||||
VM_CNT_ADD(v_vnodepgsout, count);
|
|
||||||
|
|
||||||
iov.iov_base = (caddr_t)kva;
|
|
||||||
iov.iov_len = count;
|
|
||||||
uio.uio_iov = &iov;
|
|
||||||
uio.uio_iovcnt = 1;
|
|
||||||
uio.uio_offset = offset;
|
|
||||||
uio.uio_resid = count;
|
|
||||||
uio.uio_segflg = UIO_SYSSPACE;
|
|
||||||
uio.uio_rw = UIO_WRITE;
|
|
||||||
uio.uio_td = td;
|
|
||||||
|
|
||||||
error = fuse_io_dispatch(vp, &uio, IO_DIRECT, true, cred, pid);
|
|
||||||
|
|
||||||
pmap_qremove(kva, npages);
|
|
||||||
uma_zfree(fuse_pbuf_zone, bp);
|
|
||||||
|
|
||||||
if (!error) {
|
|
||||||
int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
|
|
||||||
|
|
||||||
for (i = 0; i < nwritten; i++) {
|
|
||||||
rtvals[i] = VM_PAGER_OK;
|
|
||||||
VM_OBJECT_WLOCK(pages[i]->object);
|
|
||||||
vm_page_undirty(pages[i]);
|
|
||||||
VM_OBJECT_WUNLOCK(pages[i]->object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return rtvals[0];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static const char extattr_namespace_separator = '.';
|
static const char extattr_namespace_separator = '.';
|
||||||
|
@ -413,7 +413,8 @@ TEST_F(Read, eio)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If the server returns a short read when direct io is not in use, that
|
* If the server returns a short read when direct io is not in use, that
|
||||||
* indicates EOF and we should update the file size.
|
* indicates EOF, because of a server-side truncation. We should invalidate
|
||||||
|
* all cached attributes. We may update the file size,
|
||||||
*/
|
*/
|
||||||
TEST_F(ReadCacheable, eof)
|
TEST_F(ReadCacheable, eof)
|
||||||
{
|
{
|
||||||
@ -425,18 +426,21 @@ TEST_F(ReadCacheable, eof)
|
|||||||
uint64_t offset = 100;
|
uint64_t offset = 100;
|
||||||
ssize_t bufsize = strlen(CONTENTS);
|
ssize_t bufsize = strlen(CONTENTS);
|
||||||
ssize_t partbufsize = 3 * bufsize / 4;
|
ssize_t partbufsize = 3 * bufsize / 4;
|
||||||
|
ssize_t r;
|
||||||
char buf[bufsize];
|
char buf[bufsize];
|
||||||
struct stat sb;
|
struct stat sb;
|
||||||
|
|
||||||
expect_lookup(RELPATH, ino, offset + bufsize);
|
expect_lookup(RELPATH, ino, offset + bufsize);
|
||||||
expect_open(ino, 0, 1);
|
expect_open(ino, 0, 1);
|
||||||
expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
|
expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
|
||||||
|
expect_getattr(ino, offset + partbufsize);
|
||||||
|
|
||||||
fd = open(FULLPATH, O_RDONLY);
|
fd = open(FULLPATH, O_RDONLY);
|
||||||
ASSERT_LE(0, fd) << strerror(errno);
|
ASSERT_LE(0, fd) << strerror(errno);
|
||||||
|
|
||||||
ASSERT_EQ(partbufsize, pread(fd, buf, bufsize, offset))
|
r = pread(fd, buf, bufsize, offset);
|
||||||
<< strerror(errno);
|
ASSERT_LE(0, r) << strerror(errno);
|
||||||
|
EXPECT_EQ(partbufsize, r) << strerror(errno);
|
||||||
ASSERT_EQ(0, fstat(fd, &sb));
|
ASSERT_EQ(0, fstat(fd, &sb));
|
||||||
EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
|
EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
|
||||||
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
||||||
@ -459,6 +463,7 @@ TEST_F(ReadCacheable, eof_of_whole_buffer)
|
|||||||
expect_open(ino, 0, 1);
|
expect_open(ino, 0, 1);
|
||||||
expect_read(ino, 2 * m_maxbcachebuf, bufsize, bufsize, CONTENTS);
|
expect_read(ino, 2 * m_maxbcachebuf, bufsize, bufsize, CONTENTS);
|
||||||
expect_read(ino, m_maxbcachebuf, m_maxbcachebuf, 0, CONTENTS);
|
expect_read(ino, m_maxbcachebuf, m_maxbcachebuf, 0, CONTENTS);
|
||||||
|
expect_getattr(ino, m_maxbcachebuf);
|
||||||
|
|
||||||
fd = open(FULLPATH, O_RDONLY);
|
fd = open(FULLPATH, O_RDONLY);
|
||||||
ASSERT_LE(0, fd) << strerror(errno);
|
ASSERT_LE(0, fd) << strerror(errno);
|
||||||
@ -586,6 +591,57 @@ TEST_F(ReadCacheable, mmap)
|
|||||||
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A read via mmap comes up short, indicating that the file was truncated
|
||||||
|
* server-side.
|
||||||
|
*/
|
||||||
|
TEST_F(ReadCacheable, mmap_eof)
|
||||||
|
{
|
||||||
|
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||||
|
const char RELPATH[] = "some_file.txt";
|
||||||
|
const char *CONTENTS = "abcdefgh";
|
||||||
|
uint64_t ino = 42;
|
||||||
|
int fd;
|
||||||
|
ssize_t len;
|
||||||
|
size_t bufsize = strlen(CONTENTS);
|
||||||
|
struct stat sb;
|
||||||
|
void *p;
|
||||||
|
|
||||||
|
len = getpagesize();
|
||||||
|
|
||||||
|
expect_lookup(RELPATH, ino, 100000);
|
||||||
|
expect_open(ino, 0, 1);
|
||||||
|
/* mmap may legitimately try to read more data than is available */
|
||||||
|
EXPECT_CALL(*m_mock, process(
|
||||||
|
ResultOf([=](auto in) {
|
||||||
|
return (in.header.opcode == FUSE_READ &&
|
||||||
|
in.header.nodeid == ino &&
|
||||||
|
in.body.read.fh == Read::FH &&
|
||||||
|
in.body.read.offset == 0 &&
|
||||||
|
in.body.read.size >= bufsize);
|
||||||
|
}, Eq(true)),
|
||||||
|
_)
|
||||||
|
).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
|
||||||
|
out.header.len = sizeof(struct fuse_out_header) + bufsize;
|
||||||
|
memmove(out.body.bytes, CONTENTS, bufsize);
|
||||||
|
})));
|
||||||
|
expect_getattr(ino, bufsize);
|
||||||
|
|
||||||
|
fd = open(FULLPATH, O_RDONLY);
|
||||||
|
ASSERT_LE(0, fd) << strerror(errno);
|
||||||
|
|
||||||
|
p = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
|
||||||
|
ASSERT_NE(MAP_FAILED, p) << strerror(errno);
|
||||||
|
|
||||||
|
/* The file size should be automatically truncated */
|
||||||
|
ASSERT_EQ(0, memcmp(p, CONTENTS, bufsize));
|
||||||
|
ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
|
||||||
|
EXPECT_EQ((off_t)bufsize, sb.st_size);
|
||||||
|
|
||||||
|
ASSERT_EQ(0, munmap(p, len)) << strerror(errno);
|
||||||
|
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Just as when FOPEN_DIRECT_IO is used, reads with O_DIRECT should bypass
|
* Just as when FOPEN_DIRECT_IO is used, reads with O_DIRECT should bypass
|
||||||
* cache and to straight to the daemon
|
* cache and to straight to the daemon
|
||||||
|
@ -528,6 +528,39 @@ TEST_F(Write, rlimit_fsize)
|
|||||||
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* A short read indicates EOF. Test that nothing bad happens if we get EOF
|
||||||
|
* during the R of a RMW operation.
|
||||||
|
*/
|
||||||
|
TEST_F(WriteCacheable, eof_during_rmw)
|
||||||
|
{
|
||||||
|
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||||
|
const char RELPATH[] = "some_file.txt";
|
||||||
|
const char *CONTENTS = "abcdefgh";
|
||||||
|
const char *INITIAL = "XXXXXXXXXX";
|
||||||
|
uint64_t ino = 42;
|
||||||
|
uint64_t offset = 1;
|
||||||
|
ssize_t bufsize = strlen(CONTENTS);
|
||||||
|
off_t orig_fsize = 10;
|
||||||
|
off_t truncated_fsize = 5;
|
||||||
|
off_t final_fsize = bufsize;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, orig_fsize, 1);
|
||||||
|
expect_open(ino, 0, 1);
|
||||||
|
expect_read(ino, 0, orig_fsize, truncated_fsize, INITIAL, O_RDWR);
|
||||||
|
expect_getattr(ino, truncated_fsize);
|
||||||
|
expect_read(ino, 0, final_fsize, final_fsize, INITIAL, O_RDWR);
|
||||||
|
maybe_expect_write(ino, offset, bufsize, CONTENTS);
|
||||||
|
|
||||||
|
fd = open(FULLPATH, O_RDWR);
|
||||||
|
EXPECT_LE(0, fd) << strerror(errno);
|
||||||
|
|
||||||
|
ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
|
||||||
|
<< strerror(errno);
|
||||||
|
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the kernel cannot be sure which uid, gid, or pid was responsible for a
|
* If the kernel cannot be sure which uid, gid, or pid was responsible for a
|
||||||
* write, then it must set the FUSE_WRITE_CACHE bit
|
* write, then it must set the FUSE_WRITE_CACHE bit
|
||||||
|
Loading…
Reference in New Issue
Block a user