Add two new fcntls to enable/disable read-ahead:

- F_READAHEAD: specify the amount for sequential access.  The amount is
   specified in bytes and is rounded up to nearest block size.
 - F_RDAHEAD: Darwin compatible version that use 128KB as the sequential
   access size.

A third argument of zero disables the read-ahead behavior.

Please note that the read-ahead amount is also constrainted by sysctl
variable, vfs.read_max, which may need to be raised in order to better
utilize this feature.

Thanks Igor Sysoev for proposing the feature and submitting the original
version, and kib@ for his valuable comments.

Submitted by:	Igor Sysoev <is rambler-co ru>
Reviewed by:	kib@
MFC after:	1 month
This commit is contained in:
Xin LI 2009-09-28 16:59:47 +00:00
parent 13dcbd75c1
commit 82aebf697c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=197579
4 changed files with 71 additions and 3 deletions

View File

@ -28,7 +28,7 @@
.\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94
.\" $FreeBSD$
.\"
.Dd March 8, 2008
.Dd September 28, 2009
.Dt FCNTL 2
.Os
.Sh NAME
@ -241,6 +241,22 @@ will be interrupted if the signal handler has not specified the
.Dv SA_RESTART
(see
.Xr sigaction 2 ) .
.It Dv F_READAHEAD
Set or clear the read ahead amount for sequential access to the third
argument,
.Fa arg ,
which is rounded up to the nearest block size.
A zero value in
.Fa arg
turns off read ahead.
.It Dv F_RDAHEAD
Equivalent to Darwin counterpart which sets read ahead amount of 128KB
when the third argument,
.Fa arg
is non-zero.
A zero value in
.Fa arg
turns off read ahead.
.El
.Pp
When a shared lock has been set on a segment of a file,

View File

@ -421,6 +421,8 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
struct vnode *vp;
int error, flg, tmp;
int vfslocked;
u_int old, new;
uint64_t bsize;
vfslocked = 0;
error = 0;
@ -686,6 +688,48 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg)
vfslocked = 0;
fdrop(fp, td);
break;
case F_RDAHEAD:
arg = arg ? 128 * 1024: 0;
/* FALLTHROUGH */
case F_READAHEAD:
FILEDESC_SLOCK(fdp);
if ((fp = fdtofp(fd, fdp)) == NULL) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
break;
}
if (fp->f_type != DTYPE_VNODE) {
FILEDESC_SUNLOCK(fdp);
error = EBADF;
break;
}
fhold(fp);
FILEDESC_SUNLOCK(fdp);
if (arg != 0) {
vp = fp->f_vnode;
vfslocked = VFS_LOCK_GIANT(vp->v_mount);
error = vn_lock(vp, LK_SHARED);
if (error != 0)
goto readahead_vnlock_fail;
bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize;
VOP_UNLOCK(vp, 0);
fp->f_seqcount = (arg + bsize - 1) / bsize;
do {
new = old = fp->f_flag;
new |= FRDAHEAD;
} while (atomic_cmpset_rel_int(&fp->f_flag, old, new) == 0);
readahead_vnlock_fail:
VFS_UNLOCK_GIANT(vfslocked);
} else {
do {
new = old = fp->f_flag;
new &= ~FRDAHEAD;
} while (atomic_cmpset_rel_int(&fp->f_flag, old, new) == 0);
}
fdrop(fp, td);
break;
default:
error = EINVAL;
break;

View File

@ -312,6 +312,9 @@ static int
sequential_heuristic(struct uio *uio, struct file *fp)
{
if (atomic_load_acq_int(&(fp->f_flag)) & FRDAHEAD)
return (fp->f_seqcount << IO_SEQSHIFT);
/*
* Offset 0 is handled specially. open() sets f_seqcount to 1 so
* that the first I/O is normally considered to be slightly

View File

@ -140,7 +140,7 @@ typedef __pid_t pid_t;
/* bits to save after open */
#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC)
/* bits settable by fcntl(F_SETFL, ...) */
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT)
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT)
#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
@ -151,7 +151,8 @@ typedef __pid_t pid_t;
*/
#define FPOSIXSHM O_NOFOLLOW
#undef FCNTLFLAGS
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|O_DIRECT)
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|FRDAHEAD| \
O_DIRECT)
#endif
#endif
@ -176,6 +177,8 @@ typedef __pid_t pid_t;
* different meaning for fcntl(2).
*/
#if __BSD_VISIBLE
/* Read ahead */
#define FRDAHEAD O_CREAT
#endif
/* Defined by POSIX Extended API Set Part 2 */
@ -218,6 +221,8 @@ typedef __pid_t pid_t;
#define F_SETLK 12 /* set record locking information */
#define F_SETLKW 13 /* F_SETLK; wait if blocked */
#define F_SETLK_REMOTE 14 /* debugging support for remote locks */
#define F_READAHEAD 15 /* read ahead */
#define F_RDAHEAD 16 /* Darwin compatible read ahead */
/* file descriptor flags (F_GETFD, F_SETFD) */
#define FD_CLOEXEC 1 /* close-on-exec flag */