fusefs: evict invalidated cache contents during write-through

fusefs's default cache mode is "writethrough", although it currently works
more like "write-around"; writes bypass the cache completely.  Since writes
bypass the cache, they were leaving stale previously-read data in the cache.
This commit invalidates that stale data.  It also adds a new global
v_inval_buf_range method, like vtruncbuf but for a range of a file.

PR:		235774
Reported by:	cem
Sponsored by:	The FreeBSD Foundation
This commit is contained in:
Alan Somers 2019-04-12 19:05:06 +00:00
parent 1f4a83f981
commit 6af6fdcea7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=346162
4 changed files with 158 additions and 55 deletions

View File

@ -171,8 +171,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
* cached.
*/
if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) {
off_t start, end;
SDT_PROBE2(fuse, , io, trace, 1,
"direct write of vnode");
start = uio->uio_offset;
end = start + uio->uio_resid;
v_inval_buf_range(vp, start, end, fuse_iosize(vp));
err = fuse_write_directbackend(vp, uio, cred, fufh,
ioflag);
} else {

View File

@ -116,6 +116,8 @@ static void vfs_knl_assert_locked(void *arg);
static void vfs_knl_assert_unlocked(void *arg);
static void vnlru_return_batches(struct vfsops *mnt_op);
static void destroy_vpollinfo(struct vpollinfo *vi);
static int v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
daddr_t startlbn, daddr_t endlbn);
/*
* These fences are intended for cases where some synchronization is
@ -1865,9 +1867,8 @@ int
vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
{
struct buf *bp, *nbp;
int anyfreed;
daddr_t trunclbn;
struct bufobj *bo;
daddr_t startlbn;
CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__,
vp, cred, blksize, (uintmax_t)length);
@ -1875,62 +1876,15 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
/*
* Round up to the *next* lbn.
*/
trunclbn = howmany(length, blksize);
startlbn = howmany(length, blksize);
ASSERT_VOP_LOCKED(vp, "vtruncbuf");
restart:
bo = &vp->v_bufobj;
BO_LOCK(bo);
anyfreed = 1;
for (;anyfreed;) {
anyfreed = 0;
TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
if (bp->b_lblkno < trunclbn)
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
BO_LOCKPTR(bo)) == ENOLCK)
goto restart;
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF);
bp->b_flags &= ~B_ASYNC;
brelse(bp);
anyfreed = 1;
BO_LOCK(bo);
if (nbp != NULL &&
(((nbp->b_xflags & BX_VNCLEAN) == 0) ||
(nbp->b_vp != vp) ||
(nbp->b_flags & B_DELWRI))) {
BO_UNLOCK(bo);
goto restart;
}
}
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (bp->b_lblkno < trunclbn)
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
BO_LOCKPTR(bo)) == ENOLCK)
goto restart;
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF);
bp->b_flags &= ~B_ASYNC;
brelse(bp);
anyfreed = 1;
BO_LOCK(bo);
if (nbp != NULL &&
(((nbp->b_xflags & BX_VNDIRTY) == 0) ||
(nbp->b_vp != vp) ||
(nbp->b_flags & B_DELWRI) == 0)) {
BO_UNLOCK(bo);
goto restart;
}
}
}
if (v_inval_buf_range1(vp, bo, length, INT64_MAX) == EAGAIN)
goto restart;
if (length > 0) {
restartsync:
@ -1963,6 +1917,113 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
return (0);
}
/*
* Invalidate the cached pages of a file's buffer within the range of block
* numbers [startlbn, endlbn). Every buffer that overlaps that range will be
* invalidated. This must not result in any dirty data being lost.
*/
void
v_inval_buf_range(struct vnode *vp, off_t start, off_t end, int blksize)
{
struct bufobj *bo;
daddr_t startlbn, endlbn;
/* Round "outwards" */
startlbn = start / blksize;
endlbn = howmany(end, blksize);
ASSERT_VOP_LOCKED(vp, "v_inval_buf_range");
restart:
bo = &vp->v_bufobj;
BO_LOCK(bo);
#ifdef INVARIANTS
struct buf *bp, *nbp;
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
/*
* Disallow invalidating dirty data outside of the requested
* offsets. Assume that data within the requested offsets is
* being invalidated for a good reason.
*/
off_t blkstart, blkend;
blkstart = bp->b_offset;
blkend = bp->b_offset + bp->b_bcount;
KASSERT(blkstart >= start && blkend <= end,
("Invalidating extra dirty data!"));
}
#endif
if (v_inval_buf_range1(vp, bo, startlbn, endlbn) == EAGAIN)
goto restart;
BO_UNLOCK(bo);
vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end));
}
/* Like v_inval_buf_range, but operates on whole buffers instead of offsets */
static int
v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
daddr_t startlbn, daddr_t endlbn)
{
struct buf *bp, *nbp;
int anyfreed;
anyfreed = 1;
for (;anyfreed;) {
anyfreed = 0;
TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
BO_LOCKPTR(bo)) == ENOLCK)
return EAGAIN;
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF);
bp->b_flags &= ~B_ASYNC;
brelse(bp);
anyfreed = 1;
BO_LOCK(bo);
if (nbp != NULL &&
(((nbp->b_xflags & BX_VNCLEAN) == 0) ||
(nbp->b_vp != vp) ||
(nbp->b_flags & B_DELWRI))) {
BO_UNLOCK(bo);
return EAGAIN;
}
}
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
continue;
if (BUF_LOCK(bp,
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
BO_LOCKPTR(bo)) == ENOLCK)
return EAGAIN;
bremfree(bp);
bp->b_flags |= (B_INVAL | B_RELBUF);
bp->b_flags &= ~B_ASYNC;
brelse(bp);
anyfreed = 1;
BO_LOCK(bo);
if (nbp != NULL &&
(((nbp->b_xflags & BX_VNDIRTY) == 0) ||
(nbp->b_vp != vp) ||
(nbp->b_flags & B_DELWRI) == 0)) {
BO_UNLOCK(bo);
return EAGAIN;
}
}
}
return 0;
}
static void
buf_vlist_remove(struct buf *bp)
{

View File

@ -660,6 +660,8 @@ void vinactive(struct vnode *, struct thread *);
int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,
int blksize);
void v_inval_buf_range(struct vnode *vp, off_t start, off_t end,
int blksize);
void vunref(struct vnode *);
void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
int vrecycle(struct vnode *vp);

View File

@ -228,8 +228,7 @@ TEST_F(Write, append_direct_io)
}
/* A direct write should evict any overlapping cached data */
/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235774 */
TEST_F(Write, DISABLED_direct_io_evicts_cache)
TEST_F(Write, direct_io_evicts_cache)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
@ -409,6 +408,42 @@ TEST_F(Write, DISABLED_mmap)
free(zeros);
}
/* In WriteThrough mode, a write should evict overlapping cached data */
TEST_F(WriteThrough, evicts_read_cache)
{
const char FULLPATH[] = "mountpoint/some_file.txt";
const char RELPATH[] = "some_file.txt";
const char CONTENTS0[] = "abcdefgh";
const char CONTENTS1[] = "ijklmnop";
uint64_t ino = 42;
int fd;
ssize_t bufsize = strlen(CONTENTS0) + 1;
char readbuf[bufsize];
expect_lookup(RELPATH, ino, bufsize);
expect_open(ino, 0, 1);
expect_read(ino, 0, bufsize, bufsize, CONTENTS0);
expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS1);
fd = open(FULLPATH, O_RDWR);
EXPECT_LE(0, fd) << strerror(errno);
// Prime cache
ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
// Write directly, evicting cache
ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno);
// Read again. Cache should be bypassed
expect_read(ino, 0, bufsize, bufsize, CONTENTS1);
ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
ASSERT_STREQ(readbuf, CONTENTS1);
/* Deliberately leak fd. close(2) will be tested in release.cc */
}
TEST_F(WriteThrough, pwrite)
{
const char FULLPATH[] = "mountpoint/some_file.txt";