fusefs: correctly handle short reads

A fuse server may return a short read for three reasons:

* The file is opened with FOPEN_DIRECT_IO.  In this case, the short read
  should be returned directly to userland.  We already handled this case
  correctly.

* The file was truncated server-side, and the read hit EOF.  In this case,
  the kernel should update the file size.  Fixed in the case of VOP_READ.
  Fixing this for VOP_GETPAGES is TODO.

* The file is opened in writeback mode, there are dirty buffers past what
  the server thinks is the file's EOF, and the read hit what the server
  thinks is the file's EOF.  In this case, the client is trying to read a
  hole, and should zero-fill it.  We already handled this case, and I added
  a test for it.

Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Alan Somers 2019-06-21 21:44:31 +00:00
parent 87ff949a7b
commit aef22f2d75
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=349279
3 changed files with 153 additions and 14 deletions

View File

@ -348,8 +348,9 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
*/
n = 0;
if (on < bcount)
n = MIN((unsigned)(bcount - on), uio->uio_resid);
if (on < bcount - bp->b_resid)
n = MIN((unsigned)(bcount - bp->b_resid - on),
uio->uio_resid);
if (n > 0) {
SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
err = uiomove(bp->b_data + on, n, uio);
@ -357,6 +358,11 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
vfs_bio_brelse(bp, ioflag);
SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
uio->uio_resid, n, bp);
if (bp->b_resid > 0) {
/* Short read indicates EOF */
(void)fuse_vnode_setsize(vp, uio->uio_offset);
break;
}
}
return (err);
@ -415,8 +421,13 @@ fuse_read_directbackend(struct vnode *vp, struct uio *uio,
if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio)))
break;
if (fdi.iosize < fri->size)
if (fdi.iosize < fri->size) {
/*
* Short read. Should only happen at EOF or with
* direct io.
*/
break;
}
}
out:
@ -828,6 +839,7 @@ fuse_write_biobackend(struct vnode *vp, struct uio *uio,
int
fuse_io_strategy(struct vnode *vp, struct buf *bp)
{
struct fuse_vnode_data *fvdat = VTOFUD(vp);
struct fuse_filehandle *fufh;
struct ucred *cred;
struct uio *uiop;
@ -888,19 +900,35 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
if (!error && uiop->uio_resid) {
/*
* If we had a short read with no error, we must have
* hit a file hole. We should zero-fill the remainder.
* This can also occur if the server hits the file EOF.
*
* Holes used to be able to occur due to pending
* writes, but that is not possible any longer.
* A short read with no error, when not using direct io,
* and when no writes are cached, indicates EOF.
* Update the file size accordingly.
*/
int nread = bp->b_bcount - uiop->uio_resid;
int left = uiop->uio_resid;
if (left > 0)
if (fuse_data_cache_mode != FUSE_CACHE_WB ||
(fvdat->flag & FN_SIZECHANGE) == 0) {
SDT_PROBE2(fusefs, , io, trace, 1,
"Short read of a clean file");
/*
* XXX To prevent lock order problems, we must
* truncate the file upstack
*/
} else {
/*
* If dirty writes _are_ cached beyond EOF,
* that indicates a newly created hole that the
* server doesn't know about. Fill it in.
* XXX: we don't currently track whether dirty
* writes are cached beyond EOF, before EOF, or
* both.
*/
SDT_PROBE2(fusefs, , io, trace, 1,
"Short read of a dirty file");
int nread = bp->b_bcount - uiop->uio_resid;
int left = uiop->uio_resid;
bzero((char *)bp->b_data + nread, left);
uiop->uio_resid = 0;
uiop->uio_resid = 0;
}
}
if (error) {
bp->b_ioflags |= BIO_ERROR;

View File

@ -411,6 +411,69 @@ TEST_F(Read, eio)
/* Deliberately leak fd. close(2) will be tested in release.cc */
}
/*
* If the server returns a short read when direct io is not in use, that
* indicates EOF and we should update the file size.
*/
TEST_F(ReadCacheable, eof)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char *CONTENTS = "abcdefghijklmnop";
uint64_t ino = 42;
int fd;
uint64_t offset = 100;
ssize_t bufsize = strlen(CONTENTS);
ssize_t partbufsize = 3 * bufsize / 4;
char buf[bufsize];
struct stat sb;
expect_lookup(RELPATH, ino, offset + bufsize);
expect_open(ino, 0, 1);
expect_read(ino, 0, offset + bufsize, offset + partbufsize, CONTENTS);
fd = open(FULLPATH, O_RDONLY);
ASSERT_LE(0, fd) << strerror(errno);
ASSERT_EQ(partbufsize, pread(fd, buf, bufsize, offset))
<< strerror(errno);
ASSERT_EQ(0, fstat(fd, &sb));
EXPECT_EQ((off_t)(offset + partbufsize), sb.st_size);
/* Deliberately leak fd. close(2) will be tested in release.cc */
}
/* Like ReadCacheable.eof, but causes an entire buffer to be invalidated */
TEST_F(ReadCacheable, eof_of_whole_buffer)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char *CONTENTS = "abcdefghijklmnop";
uint64_t ino = 42;
int fd;
ssize_t bufsize = strlen(CONTENTS);
off_t old_filesize = m_maxbcachebuf * 2 + bufsize;
char buf[bufsize];
struct stat sb;
expect_lookup(RELPATH, ino, old_filesize);
expect_open(ino, 0, 1);
expect_read(ino, 2 * m_maxbcachebuf, bufsize, bufsize, CONTENTS);
expect_read(ino, m_maxbcachebuf, m_maxbcachebuf, 0, CONTENTS);
fd = open(FULLPATH, O_RDONLY);
ASSERT_LE(0, fd) << strerror(errno);
/* Cache the third block */
ASSERT_EQ(bufsize, pread(fd, buf, bufsize, m_maxbcachebuf * 2))
<< strerror(errno);
/* Try to read the 2nd block, but it's past EOF */
ASSERT_EQ(0, pread(fd, buf, bufsize, m_maxbcachebuf))
<< strerror(errno);
ASSERT_EQ(0, fstat(fd, &sb));
EXPECT_EQ((off_t)(m_maxbcachebuf), sb.st_size);
/* Deliberately leak fd. close(2) will be tested in release.cc */
}
/*
* With the keep_cache option, the kernel may keep its read cache across
* multiple open(2)s.

View File

@ -930,6 +930,54 @@ TEST_F(WriteBackAsync, delay)
/* Don't close the file because that would flush the cache */
}
/*
* In WriteBack mode, writes may be cached beyond what the server thinks is the
* EOF. In this case, a short read at EOF should _not_ cause fusefs to update
* the file's size.
*/
TEST_F(WriteBackAsync, eof)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char *CONTENTS0 = "abcdefgh";
const char *CONTENTS1 = "ijklmnop";
uint64_t ino = 42;
int fd;
off_t offset = m_maxbcachebuf;
ssize_t wbufsize = strlen(CONTENTS1);
off_t old_filesize = (off_t)strlen(CONTENTS0);
ssize_t rbufsize = 2 * old_filesize;
char readbuf[rbufsize];
size_t holesize = rbufsize - old_filesize;
char hole[holesize];
struct stat sb;
ssize_t r;
expect_lookup(RELPATH, ino, 0);
expect_open(ino, 0, 1);
expect_read(ino, 0, m_maxbcachebuf, old_filesize, CONTENTS0);
fd = open(FULLPATH, O_RDWR);
EXPECT_LE(0, fd) << strerror(errno);
/* Write and cache data beyond EOF */
ASSERT_EQ(wbufsize, pwrite(fd, CONTENTS1, wbufsize, offset))
<< strerror(errno);
/* Read from the old EOF */
r = pread(fd, readbuf, rbufsize, 0);
ASSERT_LE(0, r) << strerror(errno);
EXPECT_EQ(rbufsize, r) << "read should've synthesized a hole";
EXPECT_EQ(0, memcmp(CONTENTS0, readbuf, old_filesize));
bzero(hole, holesize);
EXPECT_EQ(0, memcmp(hole, readbuf + old_filesize, holesize));
/* The file's size should still be what was established by pwrite */
ASSERT_EQ(0, fstat(fd, &sb)) << strerror(errno);
EXPECT_EQ(offset + wbufsize, sb.st_size);
/* Deliberately leak fd. close(2) will be tested in release.cc */
}
/*
* Without direct_io, writes should be committed to cache
*/