Further refine the implementation of POSIX_FADV_NOREUSE.

First, extend the changes in r230782 to better handle the common case
of using NOREUSE with sequential reads.  A NOREUSE file descriptor
will now track the last implicit DONTNEED request it made as a result
of a NOREUSE read.  If a subsequent NOREUSE read is adjacent to the
previous range, it will apply the DONTNEED request to the entire range
of both the previous read and the current read.  The effect is that
each read of a file accessed sequentially will apply the DONTNEED
request to the entire range that has been read.  This allows NOREUSE
to properly handle misaligned reads by flushing each buffer to cache
once it has been completely read.

Second, apply the same changes made to read(2) by r230782 and this
change to writes.  This provides much better performance in the
sequential write case as it allows writes to still be clustered.  It
also provides much better performance for misaligned writes.  It does
mean that NOREUSE will be generally ineffective for non-sequential
writes as the current implementation relies on a future NOREUSE
write's implicit DONTNEED request to flush the dirty buffer from the
current write.

MFC after:	2 weeks
This commit is contained in:
John Baldwin 2012-06-19 18:42:24 +00:00
parent bf7f8bd30a
commit cd4ecf3cd2
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=237274
4 changed files with 94 additions and 16 deletions

View File

@ -28,7 +28,7 @@
.\" @(#)madvise.2 8.1 (Berkeley) 6/9/93
.\" $FreeBSD$
.\"
.Dd February 25, 2012
.Dd June 19, 2012
.Dt POSIX_FADVISE 2
.Os
.Sh NAME
@ -84,10 +84,9 @@ specified range and future access to this data may require a read operation.
.It Dv POSIX_FADV_NOREUSE
Tells the system that the specified data will only be accessed once and
then not reused.
Accesses to data within the specified range are treated as if the file
descriptor has the
.Dv O_DIRECT
flag enabled.
The system may decrease the in-memory priority of data once it has been
read or written.
Future access to this data may require a read operation.
.El
.Sh RETURN VALUES
.Rv -std posix_fadvise

View File

@ -4872,6 +4872,8 @@ kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len,
new->fa_advice = advice;
new->fa_start = offset;
new->fa_end = end;
new->fa_prevstart = 0;
new->fa_prevend = 0;
fp->f_advice = new;
new = fa;
}

View File

@ -542,7 +542,7 @@ vn_read(fp, uio, active_cred, flags, td)
int error, ioflag;
struct mtx *mtxp;
int advice, vfslocked;
off_t offset;
off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@ -607,9 +607,38 @@ vn_read(fp, uio, active_cred, flags, td)
fp->f_nextoff = uio->uio_offset;
VOP_UNLOCK(vp, 0);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
offset != uio->uio_offset)
error = VOP_ADVISE(vp, offset, uio->uio_offset - 1,
POSIX_FADV_DONTNEED);
offset != uio->uio_offset) {
/*
* Use POSIX_FADV_DONTNEED to flush clean pages and
* buffers for the backing file after a
* POSIX_FADV_NOREUSE read(2). To optimize the common
* case of using POSIX_FADV_NOREUSE with sequential
* access, track the previous implicit DONTNEED
* request and grow this request to include the
* current read(2) in addition to the previous
* DONTNEED. With purely sequential access this will
* cause the DONTNEED requests to continously grow to
* cover all of the previously read regions of the
* file. This allows filesystem blocks that are
* accessed by multiple calls to read(2) to be flushed
* once the last read(2) finishes.
*/
start = offset;
end = uio->uio_offset - 1;
mtx_lock(mtxp);
if (fp->f_advice != NULL &&
fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
start = fp->f_advice->fa_prevstart;
else if (fp->f_advice->fa_prevstart != 0 &&
fp->f_advice->fa_prevstart == end + 1)
end = fp->f_advice->fa_prevend;
fp->f_advice->fa_prevstart = start;
fp->f_advice->fa_prevend = end;
}
mtx_unlock(mtxp);
error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
}
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
@ -630,6 +659,7 @@ vn_write(fp, uio, active_cred, flags, td)
int error, ioflag, lock_flags;
struct mtx *mtxp;
int advice, vfslocked;
off_t offset, start, end;
KASSERT(uio->uio_td == td, ("uio_td %p is not td %p",
uio->uio_td, td));
@ -664,6 +694,7 @@ vn_write(fp, uio, active_cred, flags, td)
if ((flags & FOF_OFFSET) == 0)
uio->uio_offset = fp->f_offset;
advice = POSIX_FADV_NORMAL;
mtxp = NULL;
if (fp->f_advice != NULL) {
mtxp = mtx_pool_find(mtxpool_sleep, fp);
mtx_lock(mtxp);
@ -676,19 +707,14 @@ vn_write(fp, uio, active_cred, flags, td)
switch (advice) {
case POSIX_FADV_NORMAL:
case POSIX_FADV_SEQUENTIAL:
case POSIX_FADV_NOREUSE:
ioflag |= sequential_heuristic(uio, fp);
break;
case POSIX_FADV_RANDOM:
/* XXX: Is this correct? */
break;
case POSIX_FADV_NOREUSE:
/*
* Request the underlying FS to discard the buffers
* and pages after the I/O is complete.
*/
ioflag |= IO_DIRECT;
break;
}
offset = uio->uio_offset;
#ifdef MAC
error = mac_vnode_check_write(active_cred, fp->f_cred, vp);
@ -701,6 +727,55 @@ vn_write(fp, uio, active_cred, flags, td)
VOP_UNLOCK(vp, 0);
if (vp->v_type != VCHR)
vn_finished_write(mp);
if (error == 0 && advice == POSIX_FADV_NOREUSE &&
offset != uio->uio_offset) {
/*
* Use POSIX_FADV_DONTNEED to flush clean pages and
* buffers for the backing file after a
* POSIX_FADV_NOREUSE write(2). To optimize the
* common case of using POSIX_FADV_NOREUSE with
* sequential access, track the previous implicit
* DONTNEED request and grow this request to include
* the current write(2) in addition to the previous
* DONTNEED. With purely sequential access this will
* cause the DONTNEED requests to continously grow to
* cover all of the previously written regions of the
* file.
*
* Note that the blocks just written are almost
* certainly still dirty, so this only works when
* VOP_ADVISE() calls from subsequent writes push out
* the data written by this write(2) once the backing
* buffers are clean. However, as compared to forcing
* IO_DIRECT, this gives much saner behavior. Write
* clustering is still allowed, and clean pages are
* merely moved to the cache page queue rather than
* outright thrown away. This means a subsequent
* read(2) can still avoid hitting the disk if the
* pages have not been reclaimed.
*
* This does make POSIX_FADV_NOREUSE largely useless
* with non-sequential access. However, sequential
* access is the more common use case and the flag is
* merely advisory.
*/
start = offset;
end = uio->uio_offset - 1;
mtx_lock(mtxp);
if (fp->f_advice != NULL &&
fp->f_advice->fa_advice == POSIX_FADV_NOREUSE) {
if (start != 0 && fp->f_advice->fa_prevend + 1 == start)
start = fp->f_advice->fa_prevstart;
else if (fp->f_advice->fa_prevstart != 0 &&
fp->f_advice->fa_prevstart == end + 1)
end = fp->f_advice->fa_prevend;
fp->f_advice->fa_prevstart = start;
fp->f_advice->fa_prevend = end;
}
mtx_unlock(mtxp);
error = VOP_ADVISE(vp, start, end, POSIX_FADV_DONTNEED);
}
unlock:
VFS_UNLOCK_GIANT(vfslocked);
return (error);

View File

@ -126,6 +126,8 @@ struct fadvise_info {
int fa_advice; /* (f) FADV_* type. */
off_t fa_start; /* (f) Region start. */
off_t fa_end; /* (f) Region end. */
off_t fa_prevstart; /* (f) Previous NOREUSE start. */
off_t fa_prevend; /* (f) Previous NOREUSE end. */
};
struct file {