MFP4 (with some minor changes):

Implement the linux_io_* syscalls (AIO). They are only enabled if the native
AIO code is available (either compiled in to the kernel or as a module) at
the time the functions are used. If the AIO stuff is not available there
will be a ENOSYS.

From the submitter:
---snip---
DESIGN NOTES:

1. Linux permits a process to own multiple AIO queues (distinguished by
   "context"), but FreeBSD creates only one single AIO queue per process.
   My code maintains a request queue (STAILQ of queue(3)) per "context",
   and throws all AIO requests of all contexts owned by a process into
   the single FreeBSD per-process AIO queue.

   When the process calls io_destroy(2), io_getevents(2), io_submit(2) and
   io_cancel(2), my code can pick out requests owned by the specified context
   from the single FreeBSD per-process AIO queue according to the per-context
   request queues maintained by my code.

2. The request queue maintained by my code stores contrast information between
   Linux IO control blocks (struct linux_iocb) and FreeBSD IO control blocks
   (struct aiocb). FreeBSD IO control block actually exists in userland memory
   space, required by FreeBSD native aio_XXXXXX(2).

3. It is quite troubling that the function io_getevents() of libaio-0.3.105
   needs to use Linux-specific "struct aio_ring", which is a partial mirror
   of context in user space. I would rather take the address of context in
   kernel as the context ID, but the io_getevents() of libaio forces me to
   take the address of the "ring" in user space as the context ID.

   To my surprise, one comment line in the file "io_getevents.c" of
   libaio-0.3.105 reads:

             Ben will hate me for this

REFERENCE:

1. Linux kernel source code:   http://www.kernel.org/pub/linux/kernel/v2.6/
   (include/linux/aio_abi.h, fs/aio.c)

2. Linux manual pages:         http://www.kernel.org/pub/linux/docs/manpages/
   (io_setup(2), io_destroy(2), io_getevents(2), io_submit(2), io_cancel(2))

3. Linux Scalability Effort:   http://lse.sourceforge.net/io/aio.html
   The design notes:           http://lse.sourceforge.net/io/aionotes.txt

4. The package libaio, both source and binary:
       http://rpmfind.net/linux/rpm2html/search.php?query=libaio
   Simple transparent interface to Linux AIO system calls.

5. Libaio-oracle:              http://oss.oracle.com/projects/libaio-oracle/
   POSIX AIO implementation based on Linux AIO system calls (depending on
   libaio).
---snip---

Submitted by:	Li, Xiao <intron@intron.ac>
This commit is contained in:
Alexander Leidinger 2006-10-15 14:22:14 +00:00
parent 715e675c5f
commit 6a1162d4cd
11 changed files with 1476 additions and 19 deletions

View File

@ -840,4 +840,6 @@ typedef int l_mqd_t;
#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
#include <compat/linux/linux_aio.h>
#endif /* !_AMD64_LINUX_LINUX_H_ */

View File

@ -406,11 +406,11 @@
242 AUE_NULL UNIMPL linux_sched_getaffinity
243 AUE_NULL UNIMPL linux_set_thread_area
244 AUE_NULL UNIMPL linux_get_thread_area
245 AUE_NULL UNIMPL linux_io_setup
246 AUE_NULL UNIMPL linux_io_destroy
247 AUE_NULL UNIMPL linux_io_getevents
248 AUE_NULL UNIMPL linux_io_submit
249 AUE_NULL UNIMPL linux_io_cancel
245 AUE_NULL STD { int linux_io_setup(l_uint nr_reqs, linux_aio_context_t *ctxp); }
246 AUE_NULL STD { int linux_io_destroy(linux_aio_context_t ctx); }
247 AUE_NULL STD { int linux_io_getevents(linux_aio_context_t ctx_id, l_long min_nr, l_long nr, struct linux_io_event *events, struct l_timespec *timeout); }
248 AUE_NULL STD { int linux_io_submit(linux_aio_context_t ctx_id, l_long nr, struct linux_iocb **iocbpp); }
249 AUE_NULL STD { int linux_io_cancel(linux_aio_context_t ctx_id, struct linux_iocb *iocb, struct linux_io_event *result); }
250 AUE_NULL STD { int linux_fadvise64(void); }
251 AUE_NULL UNIMPL
252 AUE_EXIT STD { int linux_exit_group(int error_code); }

1349
sys/compat/linux/linux_aio.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,98 @@
/*-
* Copyright (c) 2006 Li, Xiao <intron@intron.ac>. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Linux Kernel Implementation of Asynchronous I/O
*/
#ifndef _LINUX_AIO_H_
#define _LINUX_AIO_H_
typedef unsigned long linux_aio_context_t;
enum {
LINUX_IOCB_CMD_PREAD = 0,
LINUX_IOCB_CMD_PWRITE = 1,
LINUX_IOCB_CMD_FSYNC = 2,
LINUX_IOCB_CMD_FDSYNC = 3,
#if 0
LINUX_IOCB_CMD_PREADX = 4,
LINUX_IOCB_CMD_POLL = 5,
#endif
LINUX_IOCB_CMD_NOOP = 6,
};
struct linux_io_event {
uint64_t data;
uint64_t obj;
int64_t res;
int64_t res2;
};
#if _BYTE_ORDER == _LITTLE_ENDIAN
#define LINUX_AIO_PADDED(x,y) x,y
#elif _BYTE_ORDER == _BIG_ENDIAN
#define LINUX_AIO_PADDED(x,y) y,x
#else
#error Unidentified byte order !!!
#endif
struct linux_iocb {
uint64_t aio_data;
uint32_t LINUX_AIO_PADDED(aio_key, aio_reserved1);
uint16_t aio_lio_opcode;
int16_t aio_reqprio;
uint32_t aio_fildes;
uint64_t aio_buf;
uint64_t aio_nbytes;
int64_t aio_offset;
uint64_t aio_reserved2; /* TODO: use this for a (struct sigevent *) */
uint64_t aio_reserved3;
};
/* User space context information structure */
struct linux_aio_ring {
l_uint ring_id;
l_uint ring_nr;
l_uint ring_head;
l_uint ring_tail;
#define LINUX_AIO_RING_MAGIC 0xa10a10a1
l_uint ring_magic;
#define LINUX_AIO_RING_COMPAT_FEATURES 1
l_uint ring_compat_features;
#define LINUX_AIO_RING_INCOMPAT_FEATURES 0
l_uint ring_incompat_features;
l_uint ring_header_length; /* Size of this structure */
struct linux_io_event ring_io_events[0];
};
#endif /* !_LINUX_AIO_H_ */

View File

@ -240,6 +240,7 @@ amd64/linux32/linux32_locore.s optional compat_linux32 \
amd64/linux32/linux32_machdep.c optional compat_linux32
amd64/linux32/linux32_sysent.c optional compat_linux32
amd64/linux32/linux32_sysvec.c optional compat_linux32
compat/linux/linux_aio.c optional compat_linux32
compat/linux/linux_emul.c optional compat_linux32
compat/linux/linux_file.c optional compat_linux32
compat/linux/linux_futex.c optional compat_linux32

View File

@ -85,6 +85,7 @@ rr232x_lib.o optional rr232x \
#
compat/linprocfs/linprocfs.c optional linprocfs
compat/linsysfs/linsysfs.c optional linsysfs
compat/linux/linux_aio.c optional compat_linux
compat/linux/linux_emul.c optional compat_linux
compat/linux/linux_file.c optional compat_linux
compat/linux/linux_futex.c optional compat_linux

View File

@ -803,4 +803,6 @@ typedef int l_mqd_t;
#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
#include <compat/linux/linux_aio.h>
#endif /* !_I386_LINUX_LINUX_H_ */

View File

@ -409,11 +409,11 @@
242 AUE_NULL UNIMPL linux_sched_getaffinity
243 AUE_NULL STD { int linux_set_thread_area(struct l_user_desc *desc); }
244 AUE_NULL STD { int linux_get_thread_area(struct l_user_desc *desc); }
245 AUE_NULL UNIMPL linux_io_setup
246 AUE_NULL UNIMPL linux_io_destroy
247 AUE_NULL UNIMPL linux_io_getevents
248 AUE_NULL UNIMPL linux_io_submit
249 AUE_NULL UNIMPL linux_io_cancel
245 AUE_NULL STD { int linux_io_setup(l_uint nr_reqs, linux_aio_context_t *ctxp); }
246 AUE_NULL STD { int linux_io_destroy(linux_aio_context_t ctx); }
247 AUE_NULL STD { int linux_io_getevents(linux_aio_context_t ctx_id, l_long min_nr, l_long nr, struct linux_io_event *events, struct l_timespec *timeout); }
248 AUE_NULL STD { int linux_io_submit(linux_aio_context_t ctx_id, l_long nr, struct linux_iocb **iocbpp); }
249 AUE_NULL STD { int linux_io_cancel(linux_aio_context_t ctx_id, struct linux_iocb *iocb, struct linux_io_event *result); }
250 AUE_NULL STD { int linux_fadvise64(void); }
251 AUE_NULL UNIMPL
252 AUE_EXIT STD { int linux_exit_group(int error_code); }

View File

@ -313,12 +313,12 @@ static struct mtx aio_sock_mtx;
static TAILQ_HEAD(,aiocblist) aio_jobs; /* (c) Async job list */
static struct unrhdr *aiod_unr;
static void aio_init_aioinfo(struct proc *p);
void aio_init_aioinfo(struct proc *p);
static void aio_onceonly(void);
static int aio_free_entry(struct aiocblist *aiocbe);
static void aio_process(struct aiocblist *aiocbe);
static int aio_newproc(int *);
static int aio_aqueue(struct thread *td, struct aiocb *job,
int aio_aqueue(struct thread *td, struct aiocb *job,
struct aioliojob *lio, int type, int osigev);
static void aio_physwakeup(struct buf *bp);
static void aio_proc_rundown(void *arg, struct proc *p);
@ -498,7 +498,7 @@ aio_unload(void)
* Init the per-process aioinfo structure. The aioinfo limits are set
* per-process for user limit (resource) management.
*/
static void
void
aio_init_aioinfo(struct proc *p)
{
struct kaioinfo *ki;
@ -1322,7 +1322,7 @@ aio_swake_cb(struct socket *so, struct sockbuf *sb)
* Queue a new AIO request. Choosing either the threaded or direct physio VCHR
* technique is done in this code.
*/
static int
int
aio_aqueue(struct thread *td, struct aiocb *job, struct aioliojob *lj,
int type, int oldsigev)
{

View File

@ -5,4 +5,6 @@
KMOD= aio
SRCS= vfs_aio.c opt_vfs_aio.h vnode_if.h
EXPORT_SYMS= aio_init_aioinfo aio_aqueue
.include <bsd.kmod.mk>

View File

@ -8,11 +8,13 @@ CFLAGS+=-DCOMPAT_IA32 -DCOMPAT_LINUX32
.PATH: ${.CURDIR}/../../compat/linux ${.CURDIR}/../../${MACHINE_ARCH}/linux${SFX}
KMOD= linux
SRCS= linux${SFX}_dummy.c linux_emul.c linux_file.c linux_futex.c linux_getcwd.c linux_ioctl.c \
linux_ipc.c linux${SFX}_machdep.c linux_mib.c linux_misc.c linux_signal.c \
linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c linux${SFX}_sysvec.c \
linux_uid16.c linux_util.c linux_time.c opt_inet6.h opt_mac.h opt_compat.h opt_posix.h \
vnode_if.h device_if.h bus_if.h
SRCS= linux_aio.c linux${SFX}_dummy.c linux_emul.c linux_file.c \
linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \
linux${SFX}_machdep.c linux_mib.c linux_misc.c linux_signal.c \
linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \
linux${SFX}_sysvec.c linux_uid16.c linux_util.c linux_time.c \
opt_inet6.h opt_mac.h opt_compat.h opt_posix.h vnode_if.h \
device_if.h bus_if.h
OBJS= linux${SFX}_locore.o
.if ${MACHINE_ARCH} == "i386"