fusefs: rewrite vop_getpages and vop_putpages

Use the standard facilities for getpages and putpages instead of bespoke implementations that don't work well with the writeback cache. This has several corollaries: * Change the way we handle short reads _again_. vfs_bio_getpages doesn't provide any way to handle unexpected short reads. Plus, I found some more lock-order problems. So now when the short read is detected we'll just clear the vnode's attribute cache, forcing the file size to be requeried the next time it's needed. VOP_GETPAGES doesn't have any way to indicate a short read to the "caller", so we just bzero the rest of the page whenever a short read happens. * Change the way we decide when to set the FUSE_WRITE_CACHE bit. We now set it for clustered writes even when the writeback cache is not in use. Sponsored by: The FreeBSD Foundation
svn path=/projects/fuse2/; revision=349378
2019-06-25 17:24:43 +00:00 · 2019-06-25 17:24:43 +00:00 · b9e2019755 · 2020-12-20 02:59:44 +00:00
commit b9e2019755
parent 48417ae0ba
5 changed files with 181 additions and 242 deletions
--- a/sys/fs/fuse/fuse_io.c
+++ b/sys/fs/fuse/fuse_io.c
@ -100,6 +100,12 @@ __FBSDID("$FreeBSD$");
 #include "fuse_ipc.h"
 #include "fuse_io.h"

+/* 
+ * Set in a struct buf to indicate that the write came from the buffer cache
+ * and the originating cred and pid are no longer known.
+ */
+#define B_FUSEFS_WRITE_CACHE B_FS_FLAG1
+
 SDT_PROVIDER_DECLARE(fusefs);
 /* 
 * Fuse trace probe:
@ -164,10 +170,10 @@ fuse_io_clear_suid_on_write(struct vnode *vp, struct ucred *cred,

 SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch, "struct vnode*", "struct uio*",
 		"int", "struct ucred*", "struct fuse_filehandle*");
-SDT_PROBE_DEFINE5(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
-    "struct uio*", "int", "bool", "struct ucred*");
+SDT_PROBE_DEFINE4(fusefs, , io, io_dispatch_filehandles_closed, "struct vnode*",
+    "struct uio*", "int", "struct ucred*");
 int
-fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
+fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
    struct ucred *cred, pid_t pid)
 {
 	struct fuse_filehandle *fufh;
@ -188,8 +194,8 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
 		closefufh = true;
 	}
 	else if (err) {
-		SDT_PROBE5(fusefs, , io, io_dispatch_filehandles_closed,
-			vp, uio, ioflag, pages, cred);
+		SDT_PROBE4(fusefs, , io, io_dispatch_filehandles_closed,
+			vp, uio, ioflag, cred);
 		printf("FUSE: io dispatch: filehandles are closed\n");
 		return err;
 	}
@ -236,15 +242,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,

 			start = uio->uio_offset;
 			end = start + uio->uio_resid;
-			/* 
-			 * Invalidate the write cache unless we're coming from
-			 * VOP_PUTPAGES, in which case we're writing _from_ the
-			 * write cache
-			 */
-			if (!pages )
-				v_inval_buf_range(vp, start, end, iosize);
+			KASSERT((ioflag & (IO_VMIO | IO_DIRECT)) !=
+				(IO_VMIO | IO_DIRECT),
+			    ("IO_DIRECT used for a cache flush?"));
+			/* Invalidate the write cache when writing directly */
+			v_inval_buf_range(vp, start, end, iosize);
 			err = fuse_write_directbackend(vp, uio, cred, fufh,
-				filesize, ioflag, pages);
+				filesize, ioflag, false);
 		} else {
 			SDT_PROBE2(fusefs, , io, trace, 1,
 				"buffered write of vnode");
@ -352,8 +356,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
 	         */

 		n = 0;
-		if (on < bcount - (intptr_t)bp->b_fsprivate1)
-			n = MIN((unsigned)(bcount - (intptr_t)bp->b_fsprivate1 - on),
+		if (on < bcount - bp->b_resid)
+			n = MIN((unsigned)(bcount - bp->b_resid - on),
 			    uio->uio_resid);
 		if (n > 0) {
 			SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
@ -362,10 +366,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
 		vfs_bio_brelse(bp, ioflag);
 		SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
 			uio->uio_resid, n, bp);
-		if ((intptr_t)bp->b_fsprivate1 > 0) {
+		if (bp->b_resid > 0) {
 			/* Short read indicates EOF */
-			(void)fuse_vnode_setsize(vp, uio->uio_offset);
-			bp->b_fsprivate1 = (void*)0;
 			break;
 		}
 	}
@ -725,6 +727,21 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
 				brelse(bp);
 				break;
 			}
+			if (bp->b_resid > 0) {
+				/* 
+				 * Short read indicates EOF.  Update file size
+				 * from the server and try again.
+				 */
+				SDT_PROBE2(fusefs, , io, trace, 1,
+					"Short read during a RMW");
+				brelse(bp);
+				err = fuse_vnode_size(vp, &filesize, cred,
+				    curthread);
+				if (err)
+					break;
+				else
+					goto again;
+			}
 		}
 		if (bp->b_wcred == NOCRED)
 			bp->b_wcred = crhold(cred);
@ -805,8 +822,11 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,

 		vfs_bio_set_flags(bp, ioflag);

+		bp->b_flags |= B_FUSEFS_WRITE_CACHE;
 		if (ioflag & IO_SYNC) {
 			SDT_PROBE2(fusefs, , io, write_biobackend_issue, 2, bp);
+			if (!(ioflag & IO_VMIO))
+				bp->b_flags &= ~B_FUSEFS_WRITE_CACHE;
 			err = bwrite(bp);
 		} else if (vm_page_count_severe() ||
 			    buf_dirty_count_severe() ||
@ -864,7 +884,6 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
 	fflag = bp->b_iocmd == BIO_READ ? FREAD : FWRITE;
 	cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred;
 	error = fuse_filehandle_getrw(vp, fflag, &fufh, cred, pid);
-	bp->b_fsprivate1 = (void*)(intptr_t)0;
 	if (bp->b_iocmd == BIO_READ && error == EBADF) {
 		/* 
 		 * This may be a read-modify-write operation on a cached file
@ -905,39 +924,40 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)

 		uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
 		error = fuse_read_directbackend(vp, uiop, cred, fufh);
-		left = uiop->uio_resid;
 		/* 
 		 * Store the amount we failed to read in the buffer's private
 		 * field, so callers can truncate the file if necessary'
 		 */
-		bp->b_fsprivate1 = (void*)(intptr_t)left;

 		if (!error && uiop->uio_resid) {
 			int nread = bp->b_bcount - uiop->uio_resid;
+			left = uiop->uio_resid;
 			bzero((char *)bp->b_data + nread, left);

-			if (fuse_data_cache_mode != FUSE_CACHE_WB || 
-			    (fvdat->flag & FN_SIZECHANGE) == 0) {
+			if ((fvdat->flag & FN_SIZECHANGE) == 0) {
 				/*
 				 * A short read with no error, when not using
 				 * direct io, and when no writes are cached,
-				 * indicates EOF.  Update the file size
-				 * accordingly.  We must still bzero the
-				 * remaining buffer so uninitialized data
-				 * doesn't get exposed by a future truncate
-				 * that extends the file.
+				 * indicates EOF caused by a server-side
+				 * truncation.  Clear the attr cache so we'll
+				 * pick up the new file size and timestamps.
+				 *
+				 * We must still bzero the remaining buffer so
+				 * uninitialized data doesn't get exposed by a
+				 * future truncate that extends the file.
 				 * 
 				 * To prevent lock order problems, we must
-				 * truncate the file upstack
+				 * truncate the file upstack, not here.
 				 */
 				SDT_PROBE2(fusefs, , io, trace, 1,
 					"Short read of a clean file");
-				uiop->uio_resid = 0;
+				fuse_vnode_clear_attr_cache(vp);
 			} else {
 				/*
 				 * If dirty writes _are_ cached beyond EOF,
 				 * that indicates a newly created hole that the
-				 * server doesn't know about.
+				 * server doesn't know about.  Those don't pose
+				 * any problem.
 				 * XXX: we don't currently track whether dirty
 				 * writes are cached beyond EOF, before EOF, or
 				 * both.
@ -976,8 +996,9 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
 			io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
 			uiop->uio_rw = UIO_WRITE;

+			bool pages = bp->b_flags & B_FUSEFS_WRITE_CACHE;
 			error = fuse_write_directbackend(vp, uiop, cred, fufh,
-				filesize, 0, false);
+				filesize, 0, pages);

 			if (error == EINTR || error == ETIMEDOUT) {
 				bp->b_flags &= ~(B_INVAL | B_NOCACHE);
--- a/sys/fs/fuse/fuse_io.h
+++ b/sys/fs/fuse/fuse_io.h
@ -60,7 +60,7 @@
 #ifndef _FUSE_IO_H_
 #define _FUSE_IO_H_

-int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, bool pages,
+int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
    struct ucred *cred, pid_t pid);
 int fuse_io_strategy(struct vnode *vp, struct buf *bp);
 int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td);
--- a/sys/fs/fuse/fuse_vnops.c
+++ b/sys/fs/fuse/fuse_vnops.c
@ -150,7 +150,6 @@ static vop_strategy_t fuse_vnop_strategy;
 static vop_symlink_t fuse_vnop_symlink;
 static vop_write_t fuse_vnop_write;
 static vop_getpages_t fuse_vnop_getpages;
-static vop_putpages_t fuse_vnop_putpages;
 static vop_print_t fuse_vnop_print;
 static vop_vptofh_t fuse_vnop_vptofh;

@ -215,7 +214,6 @@ struct vop_vector fuse_vnops = {
 	.vop_symlink = fuse_vnop_symlink,
 	.vop_write = fuse_vnop_write,
 	.vop_getpages = fuse_vnop_getpages,
-	.vop_putpages = fuse_vnop_putpages,
 	.vop_print = fuse_vnop_print,
 	.vop_vptofh = fuse_vnop_vptofh,
 };
@ -1381,7 +1379,7 @@ fuse_vnop_read(struct vop_read_args *ap)
 		ioflag |= IO_DIRECT;
 	}

-	return fuse_io_dispatch(vp, uio, ioflag, false, cred, pid);
+	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
 }

 /*
@ -1960,10 +1958,39 @@ fuse_vnop_write(struct vop_write_args *ap)
 		ioflag |= IO_DIRECT;
 	}

-	return fuse_io_dispatch(vp, uio, ioflag, false, cred, pid);
+	return fuse_io_dispatch(vp, uio, ioflag, cred, pid);
+}
+
+static daddr_t
+fuse_gbp_getblkno(struct vnode *vp, vm_ooffset_t off)
+{
+	const int biosize = fuse_iosize(vp);
+
+	return (off / biosize);
+}
+
+static int
+fuse_gbp_getblksz(struct vnode *vp, daddr_t lbn)
+{
+	off_t filesize;
+	int blksz, err;
+	const int biosize = fuse_iosize(vp);
+
+	err = fuse_vnode_size(vp, &filesize, NULL, NULL);
+	KASSERT(err == 0, ("vfs_bio_getpages can't handle errors here"));
+	if (err)
+		return biosize;
+
+	if ((off_t)lbn * biosize >= filesize) {
+		blksz = 0;
+	} else if ((off_t)(lbn + 1) * biosize > filesize) {
+		blksz = filesize - (off_t)lbn *biosize;
+	} else {
+		blksz = biosize;
+	}
+	return (blksz);
 }

-SDT_PROBE_DEFINE1(fusefs, , vnops, vnop_getpages_error, "int");
 /*
    struct vnop_getpages_args {
 	struct vnode *a_vp;
@ -1975,23 +2002,7 @@ SDT_PROBE_DEFINE1(fusefs, , vnops, vnop_getpages_error, "int");
 static int
 fuse_vnop_getpages(struct vop_getpages_args *ap)
 {
-	int i, error, nextoff, size, toff, count, npages;
-	struct uio uio;
-	struct iovec iov;
-	vm_offset_t kva;
-	struct buf *bp;
-	struct vnode *vp;
-	struct thread *td;
-	struct ucred *cred;
-	vm_page_t *pages;
-	pid_t pid = curthread->td_proc->p_pid;
-
-	vp = ap->a_vp;
-	KASSERT(vp->v_object, ("objectless vp passed to getpages"));
-	td = curthread;			/* XXX */
-	cred = curthread->td_ucred;	/* XXX */
-	pages = ap->a_m;
-	npages = ap->a_count;
+	struct vnode *vp = ap->a_vp;

 	if (!fsess_opt_mmap(vnode_mount(vp))) {
 		SDT_PROBE2(fusefs, , vnops, trace, 1,
@ -1999,190 +2010,8 @@ fuse_vnop_getpages(struct vop_getpages_args *ap)
 		return (VM_PAGER_ERROR);
 	}

-	/*
-	 * If the last page is partially valid, just return it and allow
-	 * the pager to zero-out the blanks.  Partially valid pages can
-	 * only occur at the file EOF.
-	 *
-	 * XXXGL: is that true for FUSE, which is a local filesystem,
-	 * but still somewhat disconnected from the kernel?
-	 */
-	VM_OBJECT_WLOCK(vp->v_object);
-	if (pages[npages - 1]->valid != 0 && --npages == 0)
-		goto out;
-	VM_OBJECT_WUNLOCK(vp->v_object);
-
-	/*
-	 * We use only the kva address for the buffer, but this is extremely
-	 * convenient and fast.
-	 */
-	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
-
-	kva = (vm_offset_t)bp->b_data;
-	pmap_qenter(kva, pages, npages);
-	VM_CNT_INC(v_vnodein);
-	VM_CNT_ADD(v_vnodepgsin, npages);
-
-	count = npages << PAGE_SHIFT;
-	iov.iov_base = (caddr_t)kva;
-	iov.iov_len = count;
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_offset = IDX_TO_OFF(pages[0]->pindex);
-	uio.uio_resid = count;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_rw = UIO_READ;
-	uio.uio_td = td;
-
-	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, true, cred, pid);
-	pmap_qremove(kva, npages);
-
-	uma_zfree(fuse_pbuf_zone, bp);
-
-	if (error && (uio.uio_resid == count)) {
-		SDT_PROBE1(fusefs, , vnops, vnop_getpages_error, error);
-		return VM_PAGER_ERROR;
-	}
-	/*
-	 * Calculate the number of bytes read and validate only that number
-	 * of bytes.  Note that due to pending writes, size may be 0.  This
-	 * does not mean that the remaining data is invalid!
-	 */
-
-	size = count - uio.uio_resid;
-	VM_OBJECT_WLOCK(vp->v_object);
-	fuse_vm_page_lock_queues();
-	for (i = 0, toff = 0; i < npages; i++, toff = nextoff) {
-		vm_page_t m;
-
-		nextoff = toff + PAGE_SIZE;
-		m = pages[i];
-
-		if (nextoff <= size) {
-			/*
-			 * Read operation filled an entire page
-			 */
-			m->valid = VM_PAGE_BITS_ALL;
-			KASSERT(m->dirty == 0,
-			    ("fuse_getpages: page %p is dirty", m));
-		} else if (size > toff) {
-			/*
-			 * Read operation filled a partial page.
-			 */
-			m->valid = 0;
-			vm_page_set_valid_range(m, 0, size - toff);
-			KASSERT(m->dirty == 0,
-			    ("fuse_getpages: page %p is dirty", m));
-		} else {
-			/*
-			 * Read operation was short.  If no error occurred
-			 * we may have hit a zero-fill section.   We simply
-			 * leave valid set to 0.
-			 */
-			;
-		}
-	}
-	fuse_vm_page_unlock_queues();
-out:
-	VM_OBJECT_WUNLOCK(vp->v_object);
-	if (ap->a_rbehind)
-		*ap->a_rbehind = 0;
-	if (ap->a_rahead)
-		*ap->a_rahead = 0;
-	return (VM_PAGER_OK);
-}
-
-/*
-    struct vnop_putpages_args {
-	struct vnode *a_vp;
-	vm_page_t *a_m;
-	int a_count;
-	int a_sync;
-	int *a_rtvals;
-	vm_ooffset_t a_offset;
-    };
-*/
-static int
-fuse_vnop_putpages(struct vop_putpages_args *ap)
-{
-	struct uio uio;
-	struct iovec iov;
-	vm_offset_t kva;
-	struct buf *bp;
-	int i, error, npages, count;
-	off_t offset;
-	int *rtvals;
-	struct vnode *vp;
-	struct thread *td;
-	struct ucred *cred;
-	vm_page_t *pages;
-	vm_ooffset_t fsize;
-	pid_t pid = curthread->td_proc->p_pid;
-
-	vp = ap->a_vp;
-	KASSERT(vp->v_object, ("objectless vp passed to putpages"));
-	fsize = vp->v_object->un_pager.vnp.vnp_size;
-	td = curthread;			/* XXX */
-	cred = curthread->td_ucred;	/* XXX */
-	pages = ap->a_m;
-	count = ap->a_count;
-	rtvals = ap->a_rtvals;
-	npages = btoc(count);
-	offset = IDX_TO_OFF(pages[0]->pindex);
-
-	if (!fsess_opt_mmap(vnode_mount(vp))) {
-		SDT_PROBE2(fusefs, , vnops, trace, 1,
-			"called on non-cacheable vnode??\n");
-	}
-	for (i = 0; i < npages; i++)
-		rtvals[i] = VM_PAGER_AGAIN;
-
-	/*
-	 * When putting pages, do not extend file past EOF.
-	 */
-
-	if (offset + count > fsize) {
-		count = fsize - offset;
-		if (count < 0)
-			count = 0;
-	}
-	/*
-	 * We use only the kva address for the buffer, but this is extremely
-	 * convenient and fast.
-	 */
-	bp = uma_zalloc(fuse_pbuf_zone, M_WAITOK);
-
-	kva = (vm_offset_t)bp->b_data;
-	pmap_qenter(kva, pages, npages);
-	VM_CNT_INC(v_vnodeout);
-	VM_CNT_ADD(v_vnodepgsout, count);
-
-	iov.iov_base = (caddr_t)kva;
-	iov.iov_len = count;
-	uio.uio_iov = &iov;
-	uio.uio_iovcnt = 1;
-	uio.uio_offset = offset;
-	uio.uio_resid = count;
-	uio.uio_segflg = UIO_SYSSPACE;
-	uio.uio_rw = UIO_WRITE;
-	uio.uio_td = td;
-
-	error = fuse_io_dispatch(vp, &uio, IO_DIRECT, true, cred, pid);
-
-	pmap_qremove(kva, npages);
-	uma_zfree(fuse_pbuf_zone, bp);
-
-	if (!error) {
-		int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE;
-
-		for (i = 0; i < nwritten; i++) {
-			rtvals[i] = VM_PAGER_OK;
-			VM_OBJECT_WLOCK(pages[i]->object);
-			vm_page_undirty(pages[i]);
-			VM_OBJECT_WUNLOCK(pages[i]->object);
-		}
-	}
-	return rtvals[0];
+	return (vfs_bio_getpages(vp, ap->a_m, ap->a_count, ap->a_rbehind,
+	    ap->a_rahead, fuse_gbp_getblkno, fuse_gbp_getblksz));
 }

 static const char extattr_namespace_separator = '.';
--- a/tests/sys/fs/fusefs/read.cc
+++ b/tests/sys/fs/fusefs/read.cc
@ -413,7 +413,8 @@ TEST_F(Read, eio)

 /* 
 * If the server returns a short read when direct io is not in use, that
- * indicates EOF and we should update the file size.
+ * indicates EOF, because of a server-side truncation.  We should invalidate
+ * all cached attributes.  We may update the file size, 
 */
 TEST_F(ReadCacheable, eof)
 {
@ -425,18 +426,21 @@ TEST_F(ReadCacheable, eof)
 	uint64_t offset = 100;
 	ssize_t bufsize = strlen(CONTENTS);
 	ssize_t partbufsize = 3 * bufsize / 4;
+	ssize_t r;
 	char buf[bufsize];
 	struct stat sb;

 	expect_lookup(RELPATH, ino, offset + bufsize);
 	expect_open(ino, 0, 1);
 	expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
+	expect_getattr(ino, offset + partbufsize);

 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);

-	ASSERT_EQ(partbufsize, pread(fd, buf, bufsize, offset))
-		<< strerror(errno);
+	r = pread(fd, buf, bufsize, offset);
+	ASSERT_LE(0, r) << strerror(errno);
+	EXPECT_EQ(partbufsize, r) << strerror(errno);
 	ASSERT_EQ(0, fstat(fd, &sb));
 	EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
@ -459,6 +463,7 @@ TEST_F(ReadCacheable, eof_of_whole_buffer)
 	expect_open(ino, 0, 1);
 	expect_read(ino, 2 * m_maxbcachebuf, bufsize, bufsize, CONTENTS);
 	expect_read(ino, m_maxbcachebuf, m_maxbcachebuf, 0, CONTENTS);
+	expect_getattr(ino, m_maxbcachebuf);

 	fd = open(FULLPATH, O_RDONLY);
 	ASSERT_LE(0, fd) << strerror(errno);
@ -586,6 +591,57 @@ TEST_F(ReadCacheable, mmap)
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }

+/* 
+ * A read via mmap comes up short, indicating that the file was truncated
+ * server-side.
+ */
+TEST_F(ReadCacheable, mmap_eof)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char *CONTENTS = "abcdefgh";
+	uint64_t ino = 42;
+	int fd;
+	ssize_t len;
+	size_t bufsize = strlen(CONTENTS);
+	struct stat sb;
+	void *p;
+
+	len = getpagesize();
+
+	expect_lookup(RELPATH, ino, 100000);
+	expect_open(ino, 0, 1);
+	/* mmap may legitimately try to read more data than is available */
+	EXPECT_CALL(*m_mock, process(
+		ResultOf([=](auto in) {
+			return (in.header.opcode == FUSE_READ &&
+				in.header.nodeid == ino &&
+				in.body.read.fh == Read::FH &&
+				in.body.read.offset == 0 &&
+				in.body.read.size >= bufsize);
+		}, Eq(true)),
+		_)
+	).WillOnce(Invoke(ReturnImmediate([=](auto in __unused, auto& out) {
+		out.header.len = sizeof(struct fuse_out_header) + bufsize;
+		memmove(out.body.bytes, CONTENTS, bufsize);
+	})));
+	expect_getattr(ino, bufsize);
+
+	fd = open(FULLPATH, O_RDONLY);
+	ASSERT_LE(0, fd) << strerror(errno);
+
+	p = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 0);
+	ASSERT_NE(MAP_FAILED, p) << strerror(errno);
+
+	/* The file size should be automatically truncated */
+	ASSERT_EQ(0, memcmp(p, CONTENTS, bufsize));
+	ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
+	EXPECT_EQ((off_t)bufsize, sb.st_size);
+
+	ASSERT_EQ(0, munmap(p, len)) << strerror(errno);
+	/* Deliberately leak fd.  close(2) will be tested in release.cc */
+}
+
 /*
 * Just as when FOPEN_DIRECT_IO is used, reads with O_DIRECT should bypass
 * cache and to straight to the daemon
--- a/tests/sys/fs/fusefs/write.cc
+++ b/tests/sys/fs/fusefs/write.cc
@ -528,6 +528,39 @@ TEST_F(Write, rlimit_fsize)
 	/* Deliberately leak fd.  close(2) will be tested in release.cc */
 }

+/* 
+ * A short read indicates EOF.  Test that nothing bad happens if we get EOF
+ * during the R of a RMW operation.
+ */
+TEST_F(WriteCacheable, eof_during_rmw)
+{
+	const char FULLPATH[] = "mountpoint/some_file.txt";
+	const char RELPATH[] = "some_file.txt";
+	const char *CONTENTS = "abcdefgh";
+	const char *INITIAL   = "XXXXXXXXXX";
+	uint64_t ino = 42;
+	uint64_t offset = 1;
+	ssize_t bufsize = strlen(CONTENTS);
+	off_t orig_fsize = 10;
+	off_t truncated_fsize = 5;
+	off_t final_fsize = bufsize;
+	int fd;
+
+	FuseTest::expect_lookup(RELPATH, ino, S_IFREG | 0644, orig_fsize, 1);
+	expect_open(ino, 0, 1);
+	expect_read(ino, 0, orig_fsize, truncated_fsize, INITIAL, O_RDWR);
+	expect_getattr(ino, truncated_fsize);
+	expect_read(ino, 0, final_fsize, final_fsize, INITIAL, O_RDWR);
+	maybe_expect_write(ino, offset, bufsize, CONTENTS);
+
+	fd = open(FULLPATH, O_RDWR);
+	EXPECT_LE(0, fd) << strerror(errno);
+
+	ASSERT_EQ(bufsize, pwrite(fd, CONTENTS, bufsize, offset))
+		<< strerror(errno);
+	/* Deliberately leak fd.  close(2) will be tested in release.cc */
+}
+
 /*
 * If the kernel cannot be sure which uid, gid, or pid was responsible for a
 * write, then it must set the FUSE_WRITE_CACHE bit