fusefs: improve the short read fix from r349279

VOP_GETPAGES intentionally tries to read beyond EOF, so fuse_read_biobackend
can't rely on bp->b_resid > 0 indicating a short read.  And adjusting
bp->b_count after a short read seems to cause some sort of resource leak.
Instead, store the shortfall in the bp->b_fsprivate1 field.

Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Alan Somers 2019-06-24 17:05:31 +00:00
parent 44f654fdc5
commit 17575bad85
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=349332
2 changed files with 122 additions and 21 deletions

View File

@ -348,8 +348,8 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
*/
n = 0;
if (on < bcount - bp->b_resid)
n = MIN((unsigned)(bcount - bp->b_resid - on),
if (on < bcount - (intptr_t)bp->b_fsprivate1)
n = MIN((unsigned)(bcount - (intptr_t)bp->b_fsprivate1 - on),
uio->uio_resid);
if (n > 0) {
SDT_PROBE2(fusefs, , io, read_bio_backend_feed, n, bp);
@ -358,9 +358,10 @@ fuse_read_biobackend(struct vnode *vp, struct uio *uio, int ioflag,
vfs_bio_brelse(bp, ioflag);
SDT_PROBE4(fusefs, , io, read_bio_backend_end, err,
uio->uio_resid, n, bp);
if (bp->b_resid > 0) {
if ((intptr_t)bp->b_fsprivate1 > 0) {
/* Short read indicates EOF */
(void)fuse_vnode_setsize(vp, uio->uio_offset);
bp->b_fsprivate1 = (void*)0;
break;
}
}
@ -891,16 +892,23 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
KASSERT(!(bp->b_flags & B_DONE),
("fuse_io_strategy: bp %p already marked done", bp));
if (bp->b_iocmd == BIO_READ) {
ssize_t left;
io.iov_len = uiop->uio_resid = bp->b_bcount;
io.iov_base = bp->b_data;
uiop->uio_rw = UIO_READ;
uiop->uio_offset = ((off_t)bp->b_lblkno) * biosize;
error = fuse_read_directbackend(vp, uiop, cred, fufh);
left = uiop->uio_resid;
/*
* Store the amount we failed to read in the buffer's private
* field, so callers can truncate the file if necessary'
*/
bp->b_fsprivate1 = (void*)(intptr_t)left;
if (!error && uiop->uio_resid) {
int nread = bp->b_bcount - uiop->uio_resid;
int left = uiop->uio_resid;
bzero((char *)bp->b_data + nread, left);
if (fuse_data_cache_mode != FUSE_CACHE_WB ||
@ -914,11 +922,12 @@ fuse_io_strategy(struct vnode *vp, struct buf *bp)
* doesn't get exposed by a future truncate
* that extends the file.
*
* XXX To prevent lock order problems, we must
* To prevent lock order problems, we must
* truncate the file upstack
*/
SDT_PROBE2(fusefs, , io, trace, 1,
"Short read of a clean file");
uiop->uio_resid = 0;
} else {
/*
* If dirty writes _are_ cached beyond EOF,

View File

@ -29,6 +29,8 @@
*/
extern "C" {
#include <sys/mman.h>
#include <fcntl.h>
#include <stdlib.h>
#include <unistd.h>
@ -76,11 +78,13 @@ class Io: public FuseTest,
public WithParamInterface<tuple<uint32_t, uint32_t, bool>> {
public:
int m_backing_fd, m_control_fd, m_test_fd;
off_t m_filesize;
Io(): m_backing_fd(-1), m_control_fd(-1) {};
void SetUp()
{
m_filesize = 0;
m_backing_fd = open("backing_file", O_RDWR | O_CREAT | O_TRUNC, 0644);
if (m_backing_fd < 0)
FAIL() << strerror(errno);
@ -126,10 +130,11 @@ void SetUp()
ssize_t isize = in.body.write.size;
off_t iofs = in.body.write.offset;
void *buf = out.body.bytes;
ssize_t osize;
ASSERT_LE(0, pread(m_backing_fd, buf, isize, iofs))
<< strerror(errno);
out.header.len = sizeof(struct fuse_out_header) + isize;
osize = pread(m_backing_fd, buf, isize, iofs);
ASSERT_LE(0, osize) << strerror(errno);
out.header.len = sizeof(struct fuse_out_header) + osize;
})));
EXPECT_CALL(*m_mock, process(
ResultOf([=](auto in) {
@ -182,21 +187,52 @@ void do_ftruncate(off_t offs)
{
ASSERT_EQ(0, ftruncate(m_test_fd, offs)) << strerror(errno);
ASSERT_EQ(0, ftruncate(m_control_fd, offs)) << strerror(errno);
m_filesize = offs;
}
void do_mapread(ssize_t size, off_t offs)
{
void *control_buf, *p;
off_t pg_offset, page_mask;
size_t map_size;
page_mask = getpagesize() - 1;
pg_offset = offs & page_mask;
map_size = pg_offset + size;
p = mmap(NULL, map_size, PROT_READ, MAP_FILE | MAP_SHARED, m_test_fd,
offs - pg_offset);
ASSERT_NE(p, MAP_FAILED) << strerror(errno);
control_buf = malloc(size);
ASSERT_NE(NULL, control_buf) << strerror(errno);
ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs))
<< strerror(errno);
compare((void*)((char*)p + pg_offset), control_buf, offs, size);
ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno);
free(control_buf);
}
void do_read(ssize_t size, off_t offs)
{
void *test_buf, *control_buf;
ssize_t r;
test_buf = malloc(size);
ASSERT_NE(NULL, test_buf) << strerror(errno);
control_buf = malloc(size);
ASSERT_NE(NULL, control_buf) << strerror(errno);
ASSERT_EQ(size, pread(m_test_fd, test_buf, size, offs))
<< strerror(errno);
ASSERT_EQ(size, pread(m_control_fd, control_buf, size, offs))
<< strerror(errno);
errno = 0;
r = pread(m_test_fd, test_buf, size, offs);
ASSERT_NE(-1, r) << strerror(errno);
ASSERT_EQ(size, r) << "unexpected short read";
r = pread(m_control_fd, control_buf, size, offs);
ASSERT_NE(-1, r) << strerror(errno);
ASSERT_EQ(size, r) << "unexpected short read";
compare(test_buf, control_buf, offs, size);
@ -204,6 +240,43 @@ void do_read(ssize_t size, off_t offs)
free(test_buf);
}
void do_mapwrite(ssize_t size, off_t offs)
{
char *buf;
void *p;
off_t pg_offset, page_mask;
size_t map_size;
long i;
page_mask = getpagesize() - 1;
pg_offset = offs & page_mask;
map_size = pg_offset + size;
buf = (char*)malloc(size);
ASSERT_NE(NULL, buf) << strerror(errno);
for (i=0; i < size; i++)
buf[i] = random();
if (offs + size > m_filesize) {
/*
* Must manually extend. vm_mmap_vnode will not implicitly
* extend a vnode
*/
do_ftruncate(offs + size);
}
p = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
MAP_FILE | MAP_SHARED, m_test_fd, offs - pg_offset);
ASSERT_NE(p, MAP_FAILED) << strerror(errno);
bcopy(buf, (char*)p + pg_offset, size);
ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs))
<< strerror(errno);
free(buf);
ASSERT_EQ(0, munmap(p, map_size)) << strerror(errno);
}
void do_write(ssize_t size, off_t offs)
{
char *buf;
@ -218,6 +291,9 @@ void do_write(ssize_t size, off_t offs)
<< strerror(errno);
ASSERT_EQ(size, pwrite(m_control_fd, buf, size, offs))
<< strerror(errno);
m_filesize = std::max(m_filesize, offs + size);
free(buf);
}
};
@ -240,6 +316,18 @@ TEST_P(Io, extend_from_dirty_page)
do_read(rsize, rofs);
}
/*
* mapwrite into a newly extended part of a file.
*
* fsx -c 100 -i 100 -l 524288 -o 131072 -N5 -P /tmp -S19 fsx.bin
*/
TEST_P(Io, extend_by_mapwrite)
{
do_mapwrite(0x849e, 0x29a3a); /* [0x29a3a, 0x31ed7] */
do_mapwrite(0x3994, 0x3c7d8); /* [0x3c7d8, 0x4016b] */
do_read(0xf556, 0x30c16); /* [0x30c16, 0x4016b] */
}
/*
* When writing the last page of a file, it must be written synchronously.
* Otherwise the cached page can become invalid by a subsequent extend
@ -249,16 +337,20 @@ TEST_P(Io, extend_from_dirty_page)
*/
TEST_P(Io, last_page)
{
off_t wofs0 = 0x1134f;
ssize_t wsize0 = 0xcc77;
off_t wofs1 = 0x2096a;
ssize_t wsize1 = 0xdfa7;
off_t rofs = 0x1a3aa;
ssize_t rsize = 0xb5b7;
do_write(0xcc77, 0x1134f); /* [0x1134f, 0x1dfc5] */
do_write(0xdfa7, 0x2096a); /* [0x2096a, 0x2e910] */
do_read(0xb5b7, 0x1a3aa); /* [0x1a3aa, 0x25960] */
}
do_write(wsize0, wofs0);
do_write(wsize1, wofs1);
do_read(rsize, rofs);
/*
* Read a hole using mmap
*
* fsx -c 100 -i 100 -l 524288 -o 131072 -N11 -P /tmp -S14 fsx.bin
*/
TEST_P(Io, mapread_hole)
{
do_write(0x123b7, 0xf205); /* [0xf205, 0x215bb] */
do_mapread(0xeeea, 0x2f4c); /* [0x2f4c, 0x11e35] */
}
/*