diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 8e918bf63cd2..4cc87ae27860 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -85,6 +85,7 @@ MAN+= abort2.2 \ adjtime.2 \ aio_cancel.2 \ aio_error.2 \ + aio_mlock.2 \ aio_read.2 \ aio_return.2 \ aio_suspend.2 \ diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 24f462125513..4f1104e170be 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -379,6 +379,7 @@ FBSD_1.2 { FBSD_1.3 { accept4; + aio_mlock; bindat; cap_fcntls_get; cap_fcntls_limit; diff --git a/lib/libc/sys/aio_mlock.2 b/lib/libc/sys/aio_mlock.2 new file mode 100644 index 000000000000..a0aa23c0d441 --- /dev/null +++ b/lib/libc/sys/aio_mlock.2 @@ -0,0 +1,133 @@ +.\" Copyright (c) 2013 Gleb Smirnoff +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd June 3, 2013 +.Dt AIO_MLOCK 2 +.Os +.Sh NAME +.Nm aio_mlock +.Nd asynchronous +.Xr mlock 2 +operation +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In aio.h +.Ft int +.Fn aio_mlock "struct aiocb *iocb" +.Sh DESCRIPTION +The +.Fn aio_mlock +system call allows the calling process to lock into memory the +physical pages associated with the virtual address range starting at +.Fa iocb->aio_buf +for +.Fa iocb->aio_nbytes +bytes. +The call returns immediately after the locking request has +been enqueued; the operation may or may not have completed at the time +the call returns. +.Pp +The +.Fa iocb +pointer may be subsequently used as an argument to +.Fn aio_return +and +.Fn aio_error +in order to determine return or error status for the enqueued operation +while it is in progress. +.Pp +If the request could not be enqueued (generally due to +.Xr aio 4 +limits), +then the call returns without having enqueued the request. +.Sh RESTRICTIONS +The Asynchronous I/O Control Block structure pointed to by +.Fa iocb +and the buffer that the +.Fa iocb->aio_buf +member of that structure references must remain valid until the +operation has completed. +For this reason, use of auto (stack) variables +for these objects is discouraged. +.Pp +The asynchronous I/O control buffer +.Fa iocb +should be zeroed before the +.Fn aio_mlock +call to avoid passing bogus context information to the kernel. +.Pp +Modifications of the Asynchronous I/O Control Block structure or the +buffer contents after the request has been enqueued, but before the +request has completed, are not allowed. +.Sh RETURN VALUES +.Rv -std aio_mlock +.Sh ERRORS +The +.Fn aio_read +system call will fail if: +.Bl -tag -width Er +.It Bq Er EAGAIN +The request was not queued because of system resource limitations. +.It Bq Er ENOSYS +The +.Fn aio_mlock +system call is not supported. +.El +.Pp +If the request is successfully enqueued, but subsequently cancelled +or an error occurs, the value returned by the +.Fn aio_return +system call is per the +.Xr mlock 2 +system call, and the value returned by the +.Fn aio_error +system call is one of the error returns from the +.Xr mlock 2 +system call, or +.Er ECANCELED +if the request was explicitly cancelled via a call to +.Fn aio_cancel . +.Sh SEE ALSO +.Xr aio_cancel 2 , +.Xr aio_error 2 , +.Xr aio_return 2 , +.Xr aio 4 , +.Xr mlock 2 +.Sh PORTABILITY +The +.Fn aio_mlock +system call is a +.Fx +extension, and should not be used in portable code. +.Sh HISTORY +The +.Fn aio_mlock +system call first appeared in +.Fx 10.0 . +.Sh AUTHORS +The system call was introduced by +.An Gleb Smirnoff Aq glebius@FreeBSD.org . diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index e537bc9b9255..bcca754b28b6 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -1044,3 +1044,5 @@ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE NOPROTO { int pipe2(int *fildes, int flags); } +543 AUE_NULL NOSTD { int freebsd32_aio_mlock( \ + struct aiocb32 *aiocbp); } diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 316f0c2becd9..789f95a5b4dd 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -977,5 +977,6 @@ __socklen_t * __restrict anamelen, \ int flags); } 542 AUE_PIPE STD { int pipe2(int *fildes, int flags); } +543 AUE_NULL NOSTD { int aio_mlock(struct aiocb *aiocbp); } ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index 208901e2852f..5fb9341b9911 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -340,6 +340,7 @@ static int aio_onceonly(void); static int aio_free_entry(struct aiocblist *aiocbe); static void aio_process_rw(struct aiocblist *aiocbe); static void aio_process_sync(struct aiocblist *aiocbe); +static void aio_process_mlock(struct aiocblist *aiocbe); static int aio_newproc(int *); int aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lio, int type, struct aiocb_ops *ops); @@ -426,6 +427,7 @@ static struct syscall_helper_data aio_syscalls[] = { SYSCALL_INIT_HELPER(aio_cancel), SYSCALL_INIT_HELPER(aio_error), SYSCALL_INIT_HELPER(aio_fsync), + SYSCALL_INIT_HELPER(aio_mlock), SYSCALL_INIT_HELPER(aio_read), SYSCALL_INIT_HELPER(aio_return), SYSCALL_INIT_HELPER(aio_suspend), @@ -453,6 +455,7 @@ static struct syscall_helper_data aio32_syscalls[] = { SYSCALL32_INIT_HELPER(freebsd32_aio_cancel), SYSCALL32_INIT_HELPER(freebsd32_aio_error), SYSCALL32_INIT_HELPER(freebsd32_aio_fsync), + SYSCALL32_INIT_HELPER(freebsd32_aio_mlock), SYSCALL32_INIT_HELPER(freebsd32_aio_read), SYSCALL32_INIT_HELPER(freebsd32_aio_write), SYSCALL32_INIT_HELPER(freebsd32_aio_waitcomplete), @@ -702,7 +705,8 @@ aio_free_entry(struct aiocblist *aiocbe) * at open time, but this is already true of file descriptors in * a multithreaded process. */ - fdrop(aiocbe->fd_file, curthread); + if (aiocbe->fd_file) + fdrop(aiocbe->fd_file, curthread); crfree(aiocbe->cred); uma_zfree(aiocb_zone, aiocbe); AIO_LOCK(ki); @@ -967,6 +971,21 @@ aio_process_sync(struct aiocblist *aiocbe) td->td_ucred = td_savedcred; } +static void +aio_process_mlock(struct aiocblist *aiocbe) +{ + struct aiocb *cb = &aiocbe->uaiocb; + int error; + + KASSERT(aiocbe->uaiocb.aio_lio_opcode == LIO_MLOCK, + ("%s: opcode %d", __func__, aiocbe->uaiocb.aio_lio_opcode)); + + error = vm_mlock(aiocbe->userproc, aiocbe->cred, + __DEVOLATILE(void *, cb->aio_buf), cb->aio_nbytes); + cb->_aiocb_private.error = error; + cb->_aiocb_private.status = 0; +} + static void aio_bio_done_notify(struct proc *userp, struct aiocblist *aiocbe, int type) { @@ -1143,6 +1162,9 @@ aio_daemon(void *_id) case LIO_SYNC: aio_process_sync(aiocbe); break; + case LIO_MLOCK: + aio_process_mlock(aiocbe); + break; } mtx_lock(&aio_job_mtx); @@ -1283,7 +1305,7 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) cb = &aiocbe->uaiocb; fp = aiocbe->fd_file; - if (fp->f_type != DTYPE_VNODE) + if (fp == NULL || fp->f_type != DTYPE_VNODE) return (-1); vp = fp->f_vnode; @@ -1635,6 +1657,9 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, case LIO_SYNC: error = fget(td, fd, CAP_FSYNC, &fp); break; + case LIO_MLOCK: + fp = NULL; + break; case LIO_NOP: error = fget(td, fd, CAP_NONE, &fp); break; @@ -1692,7 +1717,8 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, error = kqfd_register(kqfd, &kev, td, 1); aqueue_fail: if (error) { - fdrop(fp, td); + if (fp) + fdrop(fp, td); uma_zfree(aiocb_zone, aiocbe); ops->store_error(job, error); goto done; @@ -1709,7 +1735,7 @@ aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj, if (opcode == LIO_SYNC) goto queueit; - if (fp->f_type == DTYPE_SOCKET) { + if (fp && fp->f_type == DTYPE_SOCKET) { /* * Alternate queueing for socket ops: Reach down into the * descriptor to get the socket data. Then check to see if the @@ -2187,6 +2213,13 @@ sys_aio_write(struct thread *td, struct aio_write_args *uap) return (aio_aqueue(td, uap->aiocbp, NULL, LIO_WRITE, &aiocb_ops)); } +int +sys_aio_mlock(struct thread *td, struct aio_mlock_args *uap) +{ + + return (aio_aqueue(td, uap->aiocbp, NULL, LIO_MLOCK, &aiocb_ops)); +} + static int kern_lio_listio(struct thread *td, int mode, struct aiocb * const *uacb_list, struct aiocb **acb_list, int nent, struct sigevent *sig, @@ -2928,6 +2961,14 @@ freebsd32_aio_write(struct thread *td, struct freebsd32_aio_write_args *uap) &aiocb32_ops)); } +int +freebsd32_aio_mlock(struct thread *td, struct freebsd32_aio_mlock_args *uap) +{ + + return (aio_aqueue(td, (struct aiocb *)uap->aiocbp, NULL, LIO_MLOCK, + &aiocb32_ops)); +} + int freebsd32_aio_waitcomplete(struct thread *td, struct freebsd32_aio_waitcomplete_args *uap) diff --git a/sys/sys/aio.h b/sys/sys/aio.h index 5a8779b32eea..d6ef6aaf701b 100644 --- a/sys/sys/aio.h +++ b/sys/sys/aio.h @@ -37,6 +37,7 @@ #define LIO_READ 0x2 #ifdef _KERNEL #define LIO_SYNC 0x3 +#define LIO_MLOCK 0x4 #endif /* @@ -124,6 +125,11 @@ int aio_cancel(int, struct aiocb *); */ int aio_suspend(const struct aiocb * const[], int, const struct timespec *); +/* + * Asynchronous mlock + */ +int aio_mlock(struct aiocb *); + #ifdef __BSD_VISIBLE int aio_waitcomplete(struct aiocb **, struct timespec *); #endif