From 82aebf697c07deba5b2bc37550b1671d4f48bdab Mon Sep 17 00:00:00 2001 From: Xin LI Date: Mon, 28 Sep 2009 16:59:47 +0000 Subject: [PATCH] Add two new fcntls to enable/disable read-ahead: - F_READAHEAD: specify the amount for sequential access. The amount is specified in bytes and is rounded up to nearest block size. - F_RDAHEAD: Darwin compatible version that use 128KB as the sequential access size. A third argument of zero disables the read-ahead behavior. Please note that the read-ahead amount is also constrainted by sysctl variable, vfs.read_max, which may need to be raised in order to better utilize this feature. Thanks Igor Sysoev for proposing the feature and submitting the original version, and kib@ for his valuable comments. Submitted by: Igor Sysoev Reviewed by: kib@ MFC after: 1 month --- lib/libc/sys/fcntl.2 | 18 ++++++++++++++++- sys/kern/kern_descrip.c | 44 +++++++++++++++++++++++++++++++++++++++++ sys/kern/vfs_vnops.c | 3 +++ sys/sys/fcntl.h | 9 +++++++-- 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/lib/libc/sys/fcntl.2 b/lib/libc/sys/fcntl.2 index a16724c9e0b8..250cef46d5d1 100644 --- a/lib/libc/sys/fcntl.2 +++ b/lib/libc/sys/fcntl.2 @@ -28,7 +28,7 @@ .\" @(#)fcntl.2 8.2 (Berkeley) 1/12/94 .\" $FreeBSD$ .\" -.Dd March 8, 2008 +.Dd September 28, 2009 .Dt FCNTL 2 .Os .Sh NAME @@ -241,6 +241,22 @@ will be interrupted if the signal handler has not specified the .Dv SA_RESTART (see .Xr sigaction 2 ) . +.It Dv F_READAHEAD +Set or clear the read ahead amount for sequential access to the third +argument, +.Fa arg , +which is rounded up to the nearest block size. +A zero value in +.Fa arg +turns off read ahead. +.It Dv F_RDAHEAD +Equivalent to Darwin counterpart which sets read ahead amount of 128KB +when the third argument, +.Fa arg +is non-zero. +A zero value in +.Fa arg +turns off read ahead. .El .Pp When a shared lock has been set on a segment of a file, diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 91733db35d74..434f54a4dd8a 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -421,6 +421,8 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) struct vnode *vp; int error, flg, tmp; int vfslocked; + u_int old, new; + uint64_t bsize; vfslocked = 0; error = 0; @@ -686,6 +688,48 @@ kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg) vfslocked = 0; fdrop(fp, td); break; + + case F_RDAHEAD: + arg = arg ? 128 * 1024: 0; + /* FALLTHROUGH */ + case F_READAHEAD: + FILEDESC_SLOCK(fdp); + if ((fp = fdtofp(fd, fdp)) == NULL) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } + if (fp->f_type != DTYPE_VNODE) { + FILEDESC_SUNLOCK(fdp); + error = EBADF; + break; + } + fhold(fp); + FILEDESC_SUNLOCK(fdp); + if (arg != 0) { + vp = fp->f_vnode; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_lock(vp, LK_SHARED); + if (error != 0) + goto readahead_vnlock_fail; + bsize = fp->f_vnode->v_mount->mnt_stat.f_iosize; + VOP_UNLOCK(vp, 0); + fp->f_seqcount = (arg + bsize - 1) / bsize; + do { + new = old = fp->f_flag; + new |= FRDAHEAD; + } while (atomic_cmpset_rel_int(&fp->f_flag, old, new) == 0); +readahead_vnlock_fail: + VFS_UNLOCK_GIANT(vfslocked); + } else { + do { + new = old = fp->f_flag; + new &= ~FRDAHEAD; + } while (atomic_cmpset_rel_int(&fp->f_flag, old, new) == 0); + } + fdrop(fp, td); + break; + default: error = EINVAL; break; diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 1b77352b1c6a..03e8d938bac5 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -312,6 +312,9 @@ static int sequential_heuristic(struct uio *uio, struct file *fp) { + if (atomic_load_acq_int(&(fp->f_flag)) & FRDAHEAD) + return (fp->f_seqcount << IO_SEQSHIFT); + /* * Offset 0 is handled specially. open() sets f_seqcount to 1 so * that the first I/O is normally considered to be slightly diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 893bbcd677c0..ef394a3fe660 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -140,7 +140,7 @@ typedef __pid_t pid_t; /* bits to save after open */ #define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT|FEXEC) /* bits settable by fcntl(F_SETFL, ...) */ -#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT) +#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FRDAHEAD|O_DIRECT) #if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \ defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) @@ -151,7 +151,8 @@ typedef __pid_t pid_t; */ #define FPOSIXSHM O_NOFOLLOW #undef FCNTLFLAGS -#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|O_DIRECT) +#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|FRDAHEAD| \ + O_DIRECT) #endif #endif @@ -176,6 +177,8 @@ typedef __pid_t pid_t; * different meaning for fcntl(2). */ #if __BSD_VISIBLE +/* Read ahead */ +#define FRDAHEAD O_CREAT #endif /* Defined by POSIX Extended API Set Part 2 */ @@ -218,6 +221,8 @@ typedef __pid_t pid_t; #define F_SETLK 12 /* set record locking information */ #define F_SETLKW 13 /* F_SETLK; wait if blocked */ #define F_SETLK_REMOTE 14 /* debugging support for remote locks */ +#define F_READAHEAD 15 /* read ahead */ +#define F_RDAHEAD 16 /* Darwin compatible read ahead */ /* file descriptor flags (F_GETFD, F_SETFD) */ #define FD_CLOEXEC 1 /* close-on-exec flag */