fusefs: evict invalidated cache contents during write-through
fusefs's default cache mode is "writethrough", although it currently works more like "write-around"; writes bypass the cache completely. Since writes bypass the cache, they were leaving stale previously-read data in the cache. This commit invalidates that stale data. It also adds a new global v_inval_buf_range method, like vtruncbuf but for a range of a file. PR: 235774 Reported by: cem Sponsored by: The FreeBSD Foundation
This commit is contained in:
parent
1f4a83f981
commit
6af6fdcea7
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/projects/fuse2/; revision=346162
@ -171,8 +171,13 @@ fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag,
|
||||
* cached.
|
||||
*/
|
||||
if (directio || fuse_data_cache_mode == FUSE_CACHE_WT) {
|
||||
off_t start, end;
|
||||
|
||||
SDT_PROBE2(fuse, , io, trace, 1,
|
||||
"direct write of vnode");
|
||||
start = uio->uio_offset;
|
||||
end = start + uio->uio_resid;
|
||||
v_inval_buf_range(vp, start, end, fuse_iosize(vp));
|
||||
err = fuse_write_directbackend(vp, uio, cred, fufh,
|
||||
ioflag);
|
||||
} else {
|
||||
|
@ -116,6 +116,8 @@ static void vfs_knl_assert_locked(void *arg);
|
||||
static void vfs_knl_assert_unlocked(void *arg);
|
||||
static void vnlru_return_batches(struct vfsops *mnt_op);
|
||||
static void destroy_vpollinfo(struct vpollinfo *vi);
|
||||
static int v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
|
||||
daddr_t startlbn, daddr_t endlbn);
|
||||
|
||||
/*
|
||||
* These fences are intended for cases where some synchronization is
|
||||
@ -1865,9 +1867,8 @@ int
|
||||
vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
|
||||
{
|
||||
struct buf *bp, *nbp;
|
||||
int anyfreed;
|
||||
daddr_t trunclbn;
|
||||
struct bufobj *bo;
|
||||
daddr_t startlbn;
|
||||
|
||||
CTR5(KTR_VFS, "%s: vp %p with cred %p and block %d:%ju", __func__,
|
||||
vp, cred, blksize, (uintmax_t)length);
|
||||
@ -1875,62 +1876,15 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
|
||||
/*
|
||||
* Round up to the *next* lbn.
|
||||
*/
|
||||
trunclbn = howmany(length, blksize);
|
||||
startlbn = howmany(length, blksize);
|
||||
|
||||
ASSERT_VOP_LOCKED(vp, "vtruncbuf");
|
||||
|
||||
restart:
|
||||
bo = &vp->v_bufobj;
|
||||
BO_LOCK(bo);
|
||||
anyfreed = 1;
|
||||
for (;anyfreed;) {
|
||||
anyfreed = 0;
|
||||
TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
|
||||
if (bp->b_lblkno < trunclbn)
|
||||
continue;
|
||||
if (BUF_LOCK(bp,
|
||||
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
|
||||
BO_LOCKPTR(bo)) == ENOLCK)
|
||||
goto restart;
|
||||
|
||||
bremfree(bp);
|
||||
bp->b_flags |= (B_INVAL | B_RELBUF);
|
||||
bp->b_flags &= ~B_ASYNC;
|
||||
brelse(bp);
|
||||
anyfreed = 1;
|
||||
|
||||
BO_LOCK(bo);
|
||||
if (nbp != NULL &&
|
||||
(((nbp->b_xflags & BX_VNCLEAN) == 0) ||
|
||||
(nbp->b_vp != vp) ||
|
||||
(nbp->b_flags & B_DELWRI))) {
|
||||
BO_UNLOCK(bo);
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
|
||||
if (bp->b_lblkno < trunclbn)
|
||||
continue;
|
||||
if (BUF_LOCK(bp,
|
||||
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
|
||||
BO_LOCKPTR(bo)) == ENOLCK)
|
||||
goto restart;
|
||||
bremfree(bp);
|
||||
bp->b_flags |= (B_INVAL | B_RELBUF);
|
||||
bp->b_flags &= ~B_ASYNC;
|
||||
brelse(bp);
|
||||
anyfreed = 1;
|
||||
|
||||
BO_LOCK(bo);
|
||||
if (nbp != NULL &&
|
||||
(((nbp->b_xflags & BX_VNDIRTY) == 0) ||
|
||||
(nbp->b_vp != vp) ||
|
||||
(nbp->b_flags & B_DELWRI) == 0)) {
|
||||
BO_UNLOCK(bo);
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (v_inval_buf_range1(vp, bo, length, INT64_MAX) == EAGAIN)
|
||||
goto restart;
|
||||
|
||||
if (length > 0) {
|
||||
restartsync:
|
||||
@ -1963,6 +1917,113 @@ vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length, int blksize)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate the cached pages of a file's buffer within the range of block
|
||||
* numbers [startlbn, endlbn). Every buffer that overlaps that range will be
|
||||
* invalidated. This must not result in any dirty data being lost.
|
||||
*/
|
||||
void
|
||||
v_inval_buf_range(struct vnode *vp, off_t start, off_t end, int blksize)
|
||||
{
|
||||
struct bufobj *bo;
|
||||
daddr_t startlbn, endlbn;
|
||||
|
||||
/* Round "outwards" */
|
||||
startlbn = start / blksize;
|
||||
endlbn = howmany(end, blksize);
|
||||
|
||||
ASSERT_VOP_LOCKED(vp, "v_inval_buf_range");
|
||||
|
||||
restart:
|
||||
bo = &vp->v_bufobj;
|
||||
BO_LOCK(bo);
|
||||
|
||||
#ifdef INVARIANTS
|
||||
struct buf *bp, *nbp;
|
||||
|
||||
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
|
||||
/*
|
||||
* Disallow invalidating dirty data outside of the requested
|
||||
* offsets. Assume that data within the requested offsets is
|
||||
* being invalidated for a good reason.
|
||||
*/
|
||||
off_t blkstart, blkend;
|
||||
|
||||
blkstart = bp->b_offset;
|
||||
blkend = bp->b_offset + bp->b_bcount;
|
||||
KASSERT(blkstart >= start && blkend <= end,
|
||||
("Invalidating extra dirty data!"));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (v_inval_buf_range1(vp, bo, startlbn, endlbn) == EAGAIN)
|
||||
goto restart;
|
||||
|
||||
BO_UNLOCK(bo);
|
||||
vn_pages_remove(vp, OFF_TO_IDX(start), OFF_TO_IDX(end));
|
||||
}
|
||||
|
||||
/* Like v_inval_buf_range, but operates on whole buffers instead of offsets */
|
||||
static int
|
||||
v_inval_buf_range1(struct vnode *vp, struct bufobj *bo,
|
||||
daddr_t startlbn, daddr_t endlbn)
|
||||
{
|
||||
struct buf *bp, *nbp;
|
||||
int anyfreed;
|
||||
|
||||
anyfreed = 1;
|
||||
for (;anyfreed;) {
|
||||
anyfreed = 0;
|
||||
TAILQ_FOREACH_SAFE(bp, &bo->bo_clean.bv_hd, b_bobufs, nbp) {
|
||||
if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
|
||||
continue;
|
||||
if (BUF_LOCK(bp,
|
||||
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
|
||||
BO_LOCKPTR(bo)) == ENOLCK)
|
||||
return EAGAIN;
|
||||
|
||||
bremfree(bp);
|
||||
bp->b_flags |= (B_INVAL | B_RELBUF);
|
||||
bp->b_flags &= ~B_ASYNC;
|
||||
brelse(bp);
|
||||
anyfreed = 1;
|
||||
|
||||
BO_LOCK(bo);
|
||||
if (nbp != NULL &&
|
||||
(((nbp->b_xflags & BX_VNCLEAN) == 0) ||
|
||||
(nbp->b_vp != vp) ||
|
||||
(nbp->b_flags & B_DELWRI))) {
|
||||
BO_UNLOCK(bo);
|
||||
return EAGAIN;
|
||||
}
|
||||
}
|
||||
|
||||
TAILQ_FOREACH_SAFE(bp, &bo->bo_dirty.bv_hd, b_bobufs, nbp) {
|
||||
if (bp->b_lblkno < startlbn || bp->b_lblkno >= endlbn)
|
||||
continue;
|
||||
if (BUF_LOCK(bp,
|
||||
LK_EXCLUSIVE | LK_SLEEPFAIL | LK_INTERLOCK,
|
||||
BO_LOCKPTR(bo)) == ENOLCK)
|
||||
return EAGAIN;
|
||||
bremfree(bp);
|
||||
bp->b_flags |= (B_INVAL | B_RELBUF);
|
||||
bp->b_flags &= ~B_ASYNC;
|
||||
brelse(bp);
|
||||
anyfreed = 1;
|
||||
|
||||
BO_LOCK(bo);
|
||||
if (nbp != NULL &&
|
||||
(((nbp->b_xflags & BX_VNDIRTY) == 0) ||
|
||||
(nbp->b_vp != vp) ||
|
||||
(nbp->b_flags & B_DELWRI) == 0)) {
|
||||
BO_UNLOCK(bo);
|
||||
return EAGAIN;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
buf_vlist_remove(struct buf *bp)
|
||||
{
|
||||
|
@ -660,6 +660,8 @@ void vinactive(struct vnode *, struct thread *);
|
||||
int vinvalbuf(struct vnode *vp, int save, int slpflag, int slptimeo);
|
||||
int vtruncbuf(struct vnode *vp, struct ucred *cred, off_t length,
|
||||
int blksize);
|
||||
void v_inval_buf_range(struct vnode *vp, off_t start, off_t end,
|
||||
int blksize);
|
||||
void vunref(struct vnode *);
|
||||
void vn_printf(struct vnode *vp, const char *fmt, ...) __printflike(2,3);
|
||||
int vrecycle(struct vnode *vp);
|
||||
|
@ -228,8 +228,7 @@ TEST_F(Write, append_direct_io)
|
||||
}
|
||||
|
||||
/* A direct write should evict any overlapping cached data */
|
||||
/* https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=235774 */
|
||||
TEST_F(Write, DISABLED_direct_io_evicts_cache)
|
||||
TEST_F(Write, direct_io_evicts_cache)
|
||||
{
|
||||
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||
const char RELPATH[] = "some_file.txt";
|
||||
@ -409,6 +408,42 @@ TEST_F(Write, DISABLED_mmap)
|
||||
free(zeros);
|
||||
}
|
||||
|
||||
/* In WriteThrough mode, a write should evict overlapping cached data */
|
||||
TEST_F(WriteThrough, evicts_read_cache)
|
||||
{
|
||||
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||
const char RELPATH[] = "some_file.txt";
|
||||
const char CONTENTS0[] = "abcdefgh";
|
||||
const char CONTENTS1[] = "ijklmnop";
|
||||
uint64_t ino = 42;
|
||||
int fd;
|
||||
ssize_t bufsize = strlen(CONTENTS0) + 1;
|
||||
char readbuf[bufsize];
|
||||
|
||||
expect_lookup(RELPATH, ino, bufsize);
|
||||
expect_open(ino, 0, 1);
|
||||
expect_read(ino, 0, bufsize, bufsize, CONTENTS0);
|
||||
expect_write(ino, 0, bufsize, bufsize, 0, CONTENTS1);
|
||||
|
||||
fd = open(FULLPATH, O_RDWR);
|
||||
EXPECT_LE(0, fd) << strerror(errno);
|
||||
|
||||
// Prime cache
|
||||
ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
|
||||
|
||||
// Write directly, evicting cache
|
||||
ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
|
||||
ASSERT_EQ(bufsize, write(fd, CONTENTS1, bufsize)) << strerror(errno);
|
||||
|
||||
// Read again. Cache should be bypassed
|
||||
expect_read(ino, 0, bufsize, bufsize, CONTENTS1);
|
||||
ASSERT_EQ(0, lseek(fd, 0, SEEK_SET)) << strerror(errno);
|
||||
ASSERT_EQ(bufsize, read(fd, readbuf, bufsize)) << strerror(errno);
|
||||
ASSERT_STREQ(readbuf, CONTENTS1);
|
||||
|
||||
/* Deliberately leak fd. close(2) will be tested in release.cc */
|
||||
}
|
||||
|
||||
TEST_F(WriteThrough, pwrite)
|
||||
{
|
||||
const char FULLPATH[] = "mountpoint/some_file.txt";
|
||||
|
Loading…
Reference in New Issue
Block a user