fusefs: improve the short read fix from r349279

VOP_GETPAGES intentionally tries to read beyond EOF, so fuse_read_biobackend
can't rely on bp->b_resid > 0 indicating a short read.  And adjusting
bp->b_count after a short read seems to cause some sort of resource leak.
Instead, store the shortfall in the bp->b_fsprivate1 field.

Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Alan Somers 2019-06-24 17:05:31 +00:00
parent 44f654fdc5
commit 17575bad85
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=349332
2 changed files with 122 additions and 21 deletions

View File

@ -348,8 +348,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
*/ */
n = 0; n = 0;
if (on < bcount - bp->b_resid) if (on < bcount - (intptr_t)bp->b_fsprivate1)
n = MIN((unsigned)(bcount - bp->b_resid - on), n = MIN((unsigned)(bcount - (intptr_t)bp->b_fsprivate1 - on),
uio->uio_resid); uio->uio_resid);
if (n > 0) { if (n > 0) {
SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp); SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
@ -358,9 +358,10 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
vfs_bio_brelse(bp, ioflag); vfs_bio_brelse(bp, ioflag);
SDT_PROBE4(fusefs, , io, read_bio_backend_end, err, SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
uio->uio_resid, n, bp); uio->uio_resid, n, bp);
if (bp->b_resid > 0) { if ((intptr_t)bp->b_fsprivate1 > 0) {
/* Short read indicates EOF */ /* Short read indicates EOF */
(void)fuse_vnode_setsize(vp, uio->uio_offset); (void)fuse_vnode_setsize(vp, uio->uio_offset);
bp->b_fsprivate1 = (void*)0;
break; break;
} }
} }
@ -891,16 +892,23 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
KASSERT(!(bp->b_flags & B_DONE), KASSERT(!(bp->b_flags & B_DONE),
("fuse_io_strategy: bp %p already marked done", bp)); ("fuse_io_strategy: bp %p already marked done", bp));
if (bp->b_iocmd == BIO_READ) { if (bp->b_iocmd == BIO_READ) {
ssize_t left;
io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data; io.iov_base = bp->b_data;
uiop->uio_rw = UIO_READ; uiop->uio_rw = UIO_READ;
uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize; uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
error = fuse_read_directbackend(vp, uiop, cred, fufh); error = fuse_read_directbackend(vp, uiop, cred, fufh);
left = uiop->uio_resid;
/*
* Store the amount we failed to read in the buffer's private
* field, so callers can truncate the file if necessary'
*/
bp->b_fsprivate1 = (void*)(intptr_t)left;
if (!error && uiop->uio_resid) { if (!error && uiop->uio_resid) {
int nread = bp->b_bcount - uiop->uio_resid; int nread = bp->b_bcount - uiop->uio_resid;
int left = uiop->uio_resid;
bzero((char *)bp->b_data + nread, left); bzero((char *)bp->b_data + nread, left);
if (fuse_data_cache_mode != FUSE_CACHE_WB || if (fuse_data_cache_mode != FUSE_CACHE_WB ||
@ -914,11 +922,12 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
* doesn't get exposed by a future truncate * doesn't get exposed by a future truncate
* that extends the file. * that extends the file.
* *
* XXX To prevent lock order problems, we must * To prevent lock order problems, we must
* truncate the file upstack * truncate the file upstack
*/ */
SDT_PROBE2(fusefs, , io, trace, 1, SDT_PROBE2(fusefs, , io, trace, 1,
"Short read of a clean file"); "Short read of a clean file");
uiop->uio_resid = 0;
} else { } else {
/* /*
* If dirty writes _are_ cached beyond EOF, * If dirty writes _are_ cached beyond EOF,

View File

@ -29,6 +29,8 @@
*/ */
extern "C" { extern "C" {
#include <sys/mman.h>
#include <fcntl.h> #include <fcntl.h>
#include <stdlib.h> #include <stdlib.h>
#include <unistd.h> #include <unistd.h>
@ -76,11 +78,13 @@ class Io: public FuseTest,
public WithParamInterface<tuple<uint32_t, uint32_t, bool>> { public WithParamInterface<tuple<uint32_t, uint32_t, bool>> {
public: public:
int m_backing_fd, m_control_fd, m_test_fd; int m_backing_fd, m_control_fd, m_test_fd;
off_t m_filesize;
Io(): m_backing_fd(-1), m_control_fd(-1) {}; Io(): m_backing_fd(-1), m_control_fd(-1) {};
void SetUp() void SetUp()
{ {
m_filesize = 0;
m_backing_fd = open("backing_file", O_RDWR | O_CREAT | O_TRUNC, 0644); m_backing_fd = open("backing_file", O_RDWR | O_CREAT | O_TRUNC, 0644);
if (m_backing_fd < 0) if (m_backing_fd < 0)
FAIL() << strerror(errno); FAIL() << strerror(errno);
@ -126,10 +130,11 @@ void SetUp()
ssize_t isize = in.body.write.size; ssize_t isize = in.body.write.size;
off_t iofs = in.body.write.offset; off_t iofs = in.body.write.offset;
void *buf = out.body.bytes; void *buf = out.body.bytes;
ssize_t osize;
ASSERT_LE(0, pread(m_backing_fd, buf, isize, iofs)) osize = pread(m_backing_fd, buf, isize, iofs);
<< strerror(errno); ASSERT_LE(0, osize) << strerror(errno);
out.header.len = sizeof(struct fuse_out_header) + isize; out.header.len = sizeof(struct fuse_out_header) + osize;
}))); })));
EXPECT_CALL(*m_mock, process( EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) { ResultOf([=](auto in) {
@ -182,21 +187,52 @@ void do_ftruncate(off_t offs)
{ {
ASSERT_EQ(0, ftruncate(m_test_fd, offs)) << strerror(errno); ASSERT_EQ(0, ftruncate(m_test_fd, offs)) << strerror(errno);
ASSERT_EQ(0, ftruncate(m_control_fd, offs)) << strerror(errno); ASSERT_EQ(0, ftruncate(m_control_fd, offs)) << strerror(errno);
m_filesize = offs;
}
void do_mapread(ssize_t size, off_t offs)
{
void *control_buf, *p;
off_t pg_offset, page_mask;
size_t map_size;
page_mask = getpagesize() - 1;
pg_offset = offs & page_mask;
map_size = pg_offset + size;
p = mmap(NULL, map_size, PROT_READ, MAP_FILE | MAP_SHARED, m_test_fd,
offs - pg_offset);
ASSERT_NE(p, MAP_FAILED) << strerror(errno);
control_buf = malloc(size);
ASSERT_NE(NULL, control_buf) << strerror(errno);
ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs))
<< strerror(errno);
compare((void*)((char*)p + pg_offset), control_buf, offs, size);
ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno);
free(control_buf);
} }
void do_read(ssize_t size, off_t offs) void do_read(ssize_t size, off_t offs)
{ {
void *test_buf, *control_buf; void *test_buf, *control_buf;
ssize_t r;
test_buf = malloc(size); test_buf = malloc(size);
ASSERT_NE(NULL, test_buf) << strerror(errno); ASSERT_NE(NULL, test_buf) << strerror(errno);
control_buf = malloc(size); control_buf = malloc(size);
ASSERT_NE(NULL, control_buf) << strerror(errno); ASSERT_NE(NULL, control_buf) << strerror(errno);
ASSERT_EQ(size, pread(m_test_fd, test_buf, size, offs)) errno = 0;
<< strerror(errno); r = pread(m_test_fd, test_buf, size, offs);
ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs)) ASSERT_NE(-1, r) << strerror(errno);
<< strerror(errno); ASSERT_EQ(size, r) << "unexpected short read";
r = pread(m_control_fd, control_buf, size, offs);
ASSERT_NE(-1, r) << strerror(errno);
ASSERT_EQ(size, r) << "unexpected short read";
compare(test_buf, control_buf, offs, size); compare(test_buf, control_buf, offs, size);
@ -204,6 +240,43 @@ void do_read(ssize_t size, off_t offs)
free(test_buf); free(test_buf);
} }
void do_mapwrite(ssize_t size, off_t offs)
{
char *buf;
void *p;
off_t pg_offset, page_mask;
size_t map_size;
long i;
page_mask = getpagesize() - 1;
pg_offset = offs & page_mask;
map_size = pg_offset + size;
buf = (char*)malloc(size);
ASSERT_NE(NULL, buf) << strerror(errno);
for (i=0; i < size; i++)
buf[i] = random();
if (offs + size > m_filesize) {
/*
* Must manually extend. vm_mmap_vnode will not implicitly
* extend a vnode
*/
do_ftruncate(offs + size);
}
p = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED, m_test_fd, offs - pg_offset);
ASSERT_NE(p, MAP_FAILED) << strerror(errno);
bcopy(buf, (char*)p + pg_offset, size);
ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs))
<< strerror(errno);
free(buf);
ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno);
}
void do_write(ssize_t size, off_t offs) void do_write(ssize_t size, off_t offs)
{ {
char *buf; char *buf;
@ -218,6 +291,9 @@ void do_write(ssize_t size, off_t offs)
<< strerror(errno); << strerror(errno);
ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs)) ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs))
<< strerror(errno); << strerror(errno);
m_filesize = std::max(m_filesize, offs + size);
free(buf);
} }
}; };
@ -240,6 +316,18 @@ TEST_P(Io, extend_from_dirty_page)
do_read(rsize, rofs); do_read(rsize, rofs);
} }
/*
* mapwrite into a newly extended part of a file.
*
* fsx -c 100 -i 100 -l 524288 -o 131072 -N5 -P /tmp -S19 fsx.bin
*/
TEST_P(Io, extend_by_mapwrite)
{
do_mapwrite(0x849e, 0x29a3a); /* [0x29a3a, 0x31ed7] */
do_mapwrite(0x3994, 0x3c7d8); /* [0x3c7d8, 0x4016b] */
do_read(0xf556, 0x30c16); /* [0x30c16, 0x4016b] */
}
/* /*
* When writing the last page of a file, it must be written synchronously. * When writing the last page of a file, it must be written synchronously.
* Otherwise the cached page can become invalid by a subsequent extend * Otherwise the cached page can become invalid by a subsequent extend
@ -249,16 +337,20 @@ TEST_P(Io, extend_from_dirty_page)
*/ */
TEST_P(Io, last_page) TEST_P(Io, last_page)
{ {
off_t wofs0 = 0x1134f; do_write(0xcc77, 0x1134f); /* [0x1134f, 0x1dfc5] */
ssize_t wsize0 = 0xcc77; do_write(0xdfa7, 0x2096a); /* [0x2096a, 0x2e910] */
off_t wofs1 = 0x2096a; do_read(0xb5b7, 0x1a3aa); /* [0x1a3aa, 0x25960] */
ssize_t wsize1 = 0xdfa7; }
off_t rofs = 0x1a3aa;
ssize_t rsize = 0xb5b7;
do_write(wsize0, wofs0); /*
do_write(wsize1, wofs1); * Read a hole using mmap
do_read(rsize, rofs); *
* fsx -c 100 -i 100 -l 524288 -o 131072 -N11 -P /tmp -S14 fsx.bin
*/
TEST_P(Io, mapread_hole)
{
do_write(0x123b7, 0xf205); /* [0xf205, 0x215bb] */
do_mapread(0xeeea, 0x2f4c); /* [0x2f4c, 0x11e35] */
} }
/* /*