From b12698e1a1735aa1dedd8e89c2a6e435a7e40681 Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Wed, 18 Sep 2013 18:48:33 +0000 Subject: [PATCH] Revert r255672, it has some serious flaws, leaking file references etc. Approved by: re (delphij) --- sys/amd64/linux32/linux32_dummy.c | 4 + sys/amd64/linux32/syscalls.master | 10 +- sys/compat/linux/linux_epoll.c | 554 ------------------------------ sys/compat/linux/linux_epoll.h | 68 ---- sys/conf/files.amd64 | 1 - sys/conf/files.i386 | 1 - sys/conf/files.pc98 | 1 - sys/i386/linux/linux_dummy.c | 4 + sys/i386/linux/syscalls.master | 10 +- sys/kern/kern_event.c | 123 +++---- sys/modules/linux/Makefile | 2 +- sys/sys/event.h | 18 - sys/sys/file.h | 2 - sys/sys/syscallsubr.h | 7 - 14 files changed, 69 insertions(+), 736 deletions(-) delete mode 100644 sys/compat/linux/linux_epoll.c delete mode 100644 sys/compat/linux/linux_epoll.h diff --git a/sys/amd64/linux32/linux32_dummy.c b/sys/amd64/linux32/linux32_dummy.c index 1ae64bb0889b..95bf3ec88e9c 100644 --- a/sys/amd64/linux32/linux32_dummy.c +++ b/sys/amd64/linux32/linux32_dummy.c @@ -70,6 +70,9 @@ DUMMY(pivot_root); DUMMY(mincore); DUMMY(ptrace); DUMMY(lookup_dcookie); +DUMMY(epoll_create); +DUMMY(epoll_ctl); +DUMMY(epoll_wait); DUMMY(remap_file_pages); DUMMY(timer_create); DUMMY(timer_settime); @@ -126,6 +129,7 @@ DUMMY(timerfd_gettime); /* linux 2.6.27: */ DUMMY(signalfd4); DUMMY(eventfd2); +DUMMY(epoll_create1); DUMMY(dup3); DUMMY(inotify_init1); /* linux 2.6.30: */ diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index b9a082938777..c3a10afeb0a3 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -430,11 +430,9 @@ 251 AUE_NULL UNIMPL 252 AUE_EXIT STD { int linux_exit_group(int error_code); } 253 AUE_NULL STD { int linux_lookup_dcookie(void); } -254 AUE_NULL STD { int linux_epoll_create(l_int size); } -255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \ - struct linux_epoll_event *event); } -256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \ - l_int maxevents, l_int timeout); } +254 AUE_NULL STD { int linux_epoll_create(void); } +255 AUE_NULL STD { int linux_epoll_ctl(void); } +256 AUE_NULL STD { int linux_epoll_wait(void); } 257 AUE_NULL STD { int linux_remap_file_pages(void); } 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); } 259 AUE_NULL STD { int linux_timer_create(void); } @@ -536,7 +534,7 @@ ; linux 2.6.27: 327 AUE_NULL STD { int linux_signalfd4(void); } 328 AUE_NULL STD { int linux_eventfd2(void); } -329 AUE_NULL STD { int linux_epoll_create1(l_int flags); } +329 AUE_NULL STD { int linux_epoll_create1(void); } 330 AUE_NULL STD { int linux_dup3(void); } 331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); } 332 AUE_NULL STD { int linux_inotify_init1(void); } diff --git a/sys/compat/linux/linux_epoll.c b/sys/compat/linux/linux_epoll.c deleted file mode 100644 index b9e1f2b0923c..000000000000 --- a/sys/compat/linux/linux_epoll.c +++ /dev/null @@ -1,554 +0,0 @@ -/*- - * Copyright (c) 2007 Roman Divacky - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include "opt_compat.h" -#include "opt_ktrace.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef KTRACE -#include -#endif - -#ifdef COMPAT_LINUX32 -#include -#include -#else -#include -#include -#endif - -#define ktrepoll_events(evt, count) \ - ktrstruct("linux_epoll_event", (evt), count * sizeof(*evt)) - -/* - * epoll defines 'struct epoll_event' with the field 'data' as 64 bits - * on all architectures. But on 32 bit architectures BSD 'struct kevent' only - * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied - * data verbatuim. Therefore on 32 bit architectures we allocate 64-bit memory - * block to pass user supplied data for every file descriptor. - */ -typedef uint64_t epoll_udata_t; -#if defined(__i386__) -#define EPOLL_WIDE_USER_DATA 1 -#else -#define EPOLL_WIDE_USER_DATA 0 -#endif - -#if EPOLL_WIDE_USER_DATA - -/* - * Approach similar to epoll_user_data could also be used to - * keep track of event bits per file descriptor for all architectures. - * However, it isn't obvious that such tracking would be beneficial - * in practice. - */ - -struct epoll_user_data { - unsigned sz; - epoll_udata_t data[1]; -}; -static MALLOC_DEFINE(M_LINUX_EPOLL, "epoll", "memory for epoll system"); -#define EPOLL_USER_DATA_SIZE(ndata) \ - (sizeof(struct epoll_user_data)+((ndata)-1)*sizeof(epoll_udata_t)) -#define EPOLL_USER_DATA_MARGIN 16 - -static void epoll_init_user_data(struct thread *td, struct file *epfp); -static void epoll_set_user_data(struct thread *td, struct file *epfp, int fd, epoll_udata_t user_data); -static epoll_udata_t epoll_get_user_data(struct thread *td, struct file *epfp, int fd); -static fo_close_t epoll_close; - -/* overload kqueue fileops */ -static struct fileops epollops = { - .fo_read = kqueue_read, - .fo_write = kqueue_write, - .fo_truncate = kqueue_truncate, - .fo_ioctl = kqueue_ioctl, - .fo_poll = kqueue_poll, - .fo_kqfilter = kqueue_kqfilter, - .fo_stat = kqueue_stat, - .fo_close = epoll_close, - .fo_chmod = invfo_chmod, - .fo_chown = invfo_chown, - .fo_sendfile = invfo_sendfile, -}; -#endif - -static struct file* epoll_fget(struct thread *td, int epfd); - -struct epoll_copyin_args { - struct kevent *changelist; -}; - -struct epoll_copyout_args { - struct linux_epoll_event *leventlist; - int count; - int error; -#if KTRACE || EPOLL_WIDE_USER_DATA - struct thread *td; -#endif -#if EPOLL_WIDE_USER_DATA - struct file *epfp; -#endif -}; - - -/* Create a new epoll file descriptor. */ - -static int -linux_epoll_create_common(struct thread *td) -{ - struct file *fp; - int error; - - error = kern_kqueue_locked(td, &fp); -#if EPOLL_WIDE_USER_DATA - if (error == 0) { - epoll_init_user_data(td, fp); - fdrop(fp, td); - } -#endif - return (error); -} - -int -linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) -{ - if (args->size <= 0) - return (EINVAL); - /* args->size is unused. Linux just tests it - * and then forgets it as well. */ - - return (linux_epoll_create_common(td)); -} - -int -linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) -{ - int error; - - error = linux_epoll_create_common(td); - - if (!error) { - if (args->flags & LINUX_EPOLL_CLOEXEC) - td->td_proc->p_fd->fd_ofiles[td->td_retval[0]].fde_flags |= UF_EXCLOSE; - if (args->flags & LINUX_EPOLL_NONBLOCK) - linux_msg(td, "epoll_create1 doesn't yet support EPOLL_NONBLOCK flag\n"); - } - - return (error); -} - -/* Structure converting function from epoll to kevent. */ -static int -linux_epoll_to_kevent(struct thread *td, -#if EPOLL_WIDE_USER_DATA - struct file *epfp, -#endif - int fd, struct linux_epoll_event *l_event, int kev_flags, struct kevent *kevent, int *nkevents) -{ - /* flags related to how event is registered */ - if (l_event->events & LINUX_EPOLLONESHOT) - kev_flags |= EV_ONESHOT; - if (l_event->events & LINUX_EPOLLET) { - kev_flags |= EV_CLEAR; - } - - /* flags related to what event is registered */ - if (l_event->events & LINUX_EPOLLIN || - l_event->events & LINUX_EPOLLRDNORM || - l_event->events & LINUX_EPOLLPRI || - l_event->events & LINUX_EPOLLRDHUP) { - EV_SET(kevent++, fd, EVFILT_READ, kev_flags, 0, 0, - (void*)(EPOLL_WIDE_USER_DATA ? 0 : l_event->data)); - ++*nkevents; - } - if (l_event->events & LINUX_EPOLLOUT || - l_event->events & LINUX_EPOLLWRNORM) { - EV_SET(kevent++, fd, EVFILT_WRITE, kev_flags, 0, 0, - (void*)(EPOLL_WIDE_USER_DATA ? 0 : l_event->data)); - ++*nkevents; - } - if (l_event->events & LINUX_EPOLLRDBAND || - l_event->events & LINUX_EPOLLWRBAND || - l_event->events & LINUX_EPOLLHUP || - l_event->events & LINUX_EPOLLMSG || - l_event->events & LINUX_EPOLLWAKEUP || - l_event->events & LINUX_EPOLLERR) { - linux_msg(td, "epoll_ctl doesn't yet support some event flags supplied: 0x%x\n", - l_event->events); - return (EINVAL); - } - -#if EPOLL_WIDE_USER_DATA - epoll_set_user_data(td, epfp, fd, l_event->data); -#endif - return (0); -} - -/* - * Structure converting function from kevent to epoll. In a case - * this is called on error in registration we store the error in - * event->data and pick it up later in linux_epoll_ctl(). - */ -static void -linux_kevent_to_epoll( -#if EPOLL_WIDE_USER_DATA - struct thread *td, struct file *epfp, -#endif - struct kevent *kevent, struct linux_epoll_event *l_event) -{ - if ((kevent->flags & EV_ERROR) == 0) - switch (kevent->filter) { - case EVFILT_READ: - l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; - break; - case EVFILT_WRITE: - l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; - break; - } -#if EPOLL_WIDE_USER_DATA - l_event->data = epoll_get_user_data(td, epfp, kevent->ident); -#else - l_event->data = (epoll_udata_t)kevent->udata; -#endif -} - -/* - * Copyout callback used by kevent. This converts kevent - * events to epoll events and copies them back to the - * userspace. This is also called on error on registering - * of the filter. - */ -static int -epoll_kev_copyout(void *arg, struct kevent *kevp, int count) -{ - struct epoll_copyout_args *args; - struct linux_epoll_event *eep; - int error, i; - - args = (struct epoll_copyout_args*) arg; - eep = malloc(sizeof(*eep) * count, M_TEMP, M_WAITOK | M_ZERO); - - for (i = 0; i < count; i++) - linux_kevent_to_epoll( -#if EPOLL_WIDE_USER_DATA - args->td, args->epfp, -#endif - &kevp[i], &eep[i]); - - error = copyout(eep, args->leventlist, count * sizeof(*eep)); - if (!error) { - args->leventlist += count; - args->count += count; - } else if (!args->error) - args->error = error; - -#ifdef KTRACE - if (KTRPOINT(args->td, KTR_STRUCT)) - ktrepoll_events(eep, count); -#endif - - free(eep, M_TEMP); - return (error); -} - -/* - * Copyin callback used by kevent. This copies already - * converted filters from kernel memory to the kevent - * internal kernel memory. Hence the memcpy instead of - * copyin. - */ -static int -epoll_kev_copyin(void *arg, struct kevent *kevp, int count) -{ - struct epoll_copyin_args *args; - - args = (struct epoll_copyin_args*) arg; - - memcpy(kevp, args->changelist, count * sizeof(*kevp)); - args->changelist += count; - - return (0); -} - -static int -ignore_enoent(int error) { - if (error == ENOENT) - error = 0; - return (error); -} - -static int -delete_event(struct thread *td, struct file *epfp, int fd, int filter) -{ - struct epoll_copyin_args ciargs; - struct kevent kev; - struct kevent_copyops k_ops = { &ciargs, - NULL, - epoll_kev_copyin}; - ciargs.changelist = &kev; - - EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); - return (kern_kevent_locked(td, epfp, 1, 0, &k_ops, NULL)); -} - -static int -delete_all_events(struct thread *td, struct file *epfp, int fd) -{ - /* here we ignore ENONT, because we don't keep track of events here */ - int error1, error2; - - error1 = ignore_enoent(delete_event(td, epfp, fd, EVFILT_READ)); - error2 = ignore_enoent(delete_event(td, epfp, fd, EVFILT_WRITE)); - - /* report any errors we got */ - if (error1) - return (error1); - if (error2) - return (error2); - return (0); -} - -/* - * Load epoll filter, convert it to kevent filter - * and load it into kevent subsystem. - */ -int -linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) -{ - struct file *epfp; - struct epoll_copyin_args ciargs; - struct kevent kev[2]; - struct kevent_copyops k_ops = { &ciargs, - NULL, - epoll_kev_copyin}; - struct linux_epoll_event le; - int kev_flags; - int nchanges = 0; - int error; - - if (args->epfd == args->fd) - return (EINVAL); - - if (args->op != LINUX_EPOLL_CTL_DEL) { - error = copyin(args->event, &le, sizeof(le)); - if (error) - return (error); - } -#ifdef DEBUG - if (ldebug(epoll_ctl)) - printf(ARGS(epoll_ctl,"%i, %i, %i, %u"), args->epfd, args->op, - args->fd, le.events); -#endif -#ifdef KTRACE - if (KTRPOINT(td, KTR_STRUCT) && args->op != LINUX_EPOLL_CTL_DEL) - ktrepoll_events(&le, 1); -#endif - epfp = epoll_fget(td, args->epfd); - - ciargs.changelist = kev; - - switch (args->op) { - case LINUX_EPOLL_CTL_MOD: - /* we don't memorize which events were set for this FD - on this level, so just delete all we could have set: - EVFILT_READ and EVFILT_WRITE, ignoring any errors - */ - error = delete_all_events(td, epfp, args->fd); - if (error) - goto leave; - /* FALLTHROUGH */ - case LINUX_EPOLL_CTL_ADD: - kev_flags = EV_ADD | EV_ENABLE; - break; - case LINUX_EPOLL_CTL_DEL: - /* CTL_DEL means unregister this fd with this epoll */ - error = delete_all_events(td, epfp, args->fd); - goto leave; - default: - error = EINVAL; - goto leave; - } - - error = linux_epoll_to_kevent(td, -#if EPOLL_WIDE_USER_DATA - epfp, -#endif - args->fd, &le, kev_flags, kev, &nchanges); - if (error) - goto leave; - - error = kern_kevent_locked(td, epfp, nchanges, 0, &k_ops, NULL); -leave: - fdrop(epfp, td); - return (error); -} - -/* - * Wait for a filter to be triggered on the epoll file descriptor. */ -int -linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) -{ - struct file *epfp; - struct timespec ts, *tsp; - struct epoll_copyout_args coargs; - struct kevent_copyops k_ops = { &coargs, - epoll_kev_copyout, - NULL}; - int error; - - if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS) - return (EINVAL); - - epfp = epoll_fget(td, args->epfd); - - coargs.leventlist = args->events; - coargs.count = 0; - coargs.error = 0; -#if defined(KTRACE) || EPOLL_WIDE_USER_DATA - coargs.td = td; -#endif -#if EPOLL_WIDE_USER_DATA - coargs.epfp = epfp; -#endif - - if (args->timeout != -1) { - if (args->timeout < 0) { - error = EINVAL; - goto leave; - } - /* Convert from milliseconds to timespec. */ - ts.tv_sec = args->timeout / 1000; - ts.tv_nsec = (args->timeout % 1000) * 1000000; - tsp = &ts; - } else { - tsp = NULL; - } - - error = kern_kevent_locked(td, epfp, 0, args->maxevents, &k_ops, tsp); - if (!error && coargs.error) - error = coargs.error; - - /* - * kern_keven might return ENOMEM which is not expected from epoll_wait. - * Maybe we should translate that but I don't think it matters at all. - */ - - if (!error) - td->td_retval[0] = coargs.count; -leave: - fdrop(epfp, td); - return (error); -} - -#if EPOLL_WIDE_USER_DATA -/* - * we store user_data vector in an unused for kqueue descriptor - * field fvn_epollpriv in struct file. - */ -#define EPOLL_USER_DATA_GET(epfp) \ - ((struct epoll_user_data*)(epfp)->f_vnun.fvn_epollpriv) -#define EPOLL_USER_DATA_SET(epfp, udv) \ - (epfp)->f_vnun.fvn_epollpriv = (udv) - -static void -epoll_init_user_data(struct thread *td, struct file *epfp) -{ - struct epoll_user_data *udv; - - /* override file ops to have our close operation */ - atomic_store_rel_ptr((volatile uintptr_t *)&epfp->f_ops, (uintptr_t)&epollops); - - /* allocate epoll_user_data initially for up to 16 file descriptor values */ - udv = malloc(EPOLL_USER_DATA_SIZE(EPOLL_USER_DATA_MARGIN), M_LINUX_EPOLL, M_WAITOK); - udv->sz = EPOLL_USER_DATA_MARGIN; - EPOLL_USER_DATA_SET(epfp, udv); -} - -static void -epoll_set_user_data(struct thread *td, struct file *epfp, int fd, epoll_udata_t user_data) -{ - struct epoll_user_data *udv = EPOLL_USER_DATA_GET(epfp); - - if (fd >= udv->sz) { - udv = realloc(udv, EPOLL_USER_DATA_SIZE(fd + EPOLL_USER_DATA_MARGIN), M_LINUX_EPOLL, M_WAITOK); - udv->sz = fd + EPOLL_USER_DATA_MARGIN; - EPOLL_USER_DATA_SET(epfp, udv); - } - udv->data[fd] = user_data; -} - -static epoll_udata_t -epoll_get_user_data(struct thread *td, struct file *epfp, int fd) -{ - struct epoll_user_data *udv = EPOLL_USER_DATA_GET(epfp); - if (fd >= udv->sz) - panic("epoll: user data vector is too small"); - - return (udv->data[fd]); -} - -/*ARGSUSED*/ -static int -epoll_close(struct file *epfp, struct thread *td) -{ - /* free user data vector */ - free(EPOLL_USER_DATA_GET(epfp), M_LINUX_EPOLL); - /* over to kqueue parent */ - return (kqueue_close(epfp, td)); -} -#endif - -static struct file* -epoll_fget(struct thread *td, int epfd) -{ - struct file *fp; - cap_rights_t rights; - - if (fget(td, epfd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp) != 0) - panic("epoll: no file object found for kqueue descriptor"); - - return (fp); -} - diff --git a/sys/compat/linux/linux_epoll.h b/sys/compat/linux/linux_epoll.h deleted file mode 100644 index aea4185341df..000000000000 --- a/sys/compat/linux/linux_epoll.h +++ /dev/null @@ -1,68 +0,0 @@ -/*- - * Copyright (c) 2007 Roman Divacky - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _LINUX_EPOLL_H_ -#define _LINUX_EPOLL_H_ - -#ifdef __amd64__ -#define EPOLL_PACKED __packed -#else -#define EPOLL_PACKED -#endif - -struct linux_epoll_event { - uint32_t events; - uint64_t data; -} EPOLL_PACKED; - -#define LINUX_EPOLLIN 0x001 -#define LINUX_EPOLLPRI 0x002 -#define LINUX_EPOLLOUT 0x004 -#define LINUX_EPOLLRDNORM 0x040 -#define LINUX_EPOLLRDBAND 0x080 -#define LINUX_EPOLLWRNORM 0x100 -#define LINUX_EPOLLWRBAND 0x200 -#define LINUX_EPOLLMSG 0x400 -#define LINUX_EPOLLERR 0x008 -#define LINUX_EPOLLHUP 0x010 -#define LINUX_EPOLLRDHUP 0x2000 -#define LINUX_EPOLLWAKEUP 1u<<29 -#define LINUX_EPOLLONESHOT 1u<<30 -#define LINUX_EPOLLET 1u<<31 - -#define LINUX_EPOLL_CTL_ADD 1 -#define LINUX_EPOLL_CTL_DEL 2 -#define LINUX_EPOLL_CTL_MOD 3 - -#define LINUX_EPOLL_CLOEXEC 02000000 -#define LINUX_EPOLL_NONBLOCK 00004000 - -#define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct linux_epoll_event)) - -#endif /* !_LINUX_EPOLL_H_ */ - diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index babfcab4e0ab..c1647d3f61a9 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -467,7 +467,6 @@ amd64/linux32/linux32_support.s optional compat_linux32 \ dependency "linux32_assym.h" amd64/linux32/linux32_sysent.c optional compat_linux32 amd64/linux32/linux32_sysvec.c optional compat_linux32 -compat/linux/linux_epoll.c optional compat_linux32 compat/linux/linux_emul.c optional compat_linux32 compat/linux/linux_file.c optional compat_linux32 compat/linux/linux_fork.c optional compat_linux32 diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 17791a6efb21..24dac5fb8772 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -80,7 +80,6 @@ hptrr_lib.o optional hptrr \ cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs -compat/linux/linux_epoll.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index ee915018e433..a8e60b6f7290 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -41,7 +41,6 @@ ukbdmap.h optional ukbd_dflt_keymap \ cddl/contrib/opensolaris/common/atomic/i386/opensolaris_atomic.S optional zfs compile-with "${ZFS_S}" compat/linprocfs/linprocfs.c optional linprocfs compat/linsysfs/linsysfs.c optional linsysfs -compat/linux/linux_epoll.c optional compat_linux compat/linux/linux_emul.c optional compat_linux compat/linux/linux_file.c optional compat_linux compat/linux/linux_fork.c optional compat_linux diff --git a/sys/i386/linux/linux_dummy.c b/sys/i386/linux/linux_dummy.c index f8526e116075..ab77790c3e5b 100644 --- a/sys/i386/linux/linux_dummy.c +++ b/sys/i386/linux/linux_dummy.c @@ -72,6 +72,9 @@ DUMMY(setfsgid); DUMMY(pivot_root); DUMMY(mincore); DUMMY(lookup_dcookie); +DUMMY(epoll_create); +DUMMY(epoll_ctl); +DUMMY(epoll_wait); DUMMY(remap_file_pages); DUMMY(fstatfs64); DUMMY(mbind); @@ -117,6 +120,7 @@ DUMMY(timerfd_gettime); /* linux 2.6.27: */ DUMMY(signalfd4); DUMMY(eventfd2); +DUMMY(epoll_create1); DUMMY(dup3); DUMMY(inotify_init1); /* linux 2.6.30: */ diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index 1f260bdb14d9..bb1716638639 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -432,11 +432,9 @@ 251 AUE_NULL UNIMPL 252 AUE_EXIT STD { int linux_exit_group(int error_code); } 253 AUE_NULL STD { int linux_lookup_dcookie(void); } -254 AUE_NULL STD { int linux_epoll_create(l_int size); } -255 AUE_NULL STD { int linux_epoll_ctl(l_int epfd, l_int op, l_int fd, \ - struct linux_epoll_event *event); } -256 AUE_NULL STD { int linux_epoll_wait(l_int epfd, struct linux_epoll_event *events, \ - l_int maxevents, l_int timeout); } +254 AUE_NULL STD { int linux_epoll_create(void); } +255 AUE_NULL STD { int linux_epoll_ctl(void); } +256 AUE_NULL STD { int linux_epoll_wait(void); } 257 AUE_NULL STD { int linux_remap_file_pages(void); } 258 AUE_NULL STD { int linux_set_tid_address(int *tidptr); } 259 AUE_NULL STD { int linux_timer_create(clockid_t clock_id, \ @@ -546,7 +544,7 @@ ; linux 2.6.27: 327 AUE_NULL STD { int linux_signalfd4(void); } 328 AUE_NULL STD { int linux_eventfd2(void); } -329 AUE_NULL STD { int linux_epoll_create1(l_int flags); } +329 AUE_NULL STD { int linux_epoll_create1(void); } 330 AUE_NULL STD { int linux_dup3(void); } 331 AUE_NULL STD { int linux_pipe2(l_int *pipefds, l_int flags); } 332 AUE_NULL STD { int linux_inotify_init1(void); } diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index f4b6c1931364..85ea78cd6da1 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -107,7 +107,16 @@ static void kqueue_wakeup(struct kqueue *kq); static struct filterops *kqueue_fo_find(int filt); static void kqueue_fo_release(int filt); -struct fileops kqueueops = { +static fo_rdwr_t kqueue_read; +static fo_rdwr_t kqueue_write; +static fo_truncate_t kqueue_truncate; +static fo_ioctl_t kqueue_ioctl; +static fo_poll_t kqueue_poll; +static fo_kqfilter_t kqueue_kqfilter; +static fo_stat_t kqueue_stat; +static fo_close_t kqueue_close; + +static struct fileops kqueueops = { .fo_read = kqueue_read, .fo_write = kqueue_write, .fo_truncate = kqueue_truncate, @@ -294,7 +303,7 @@ filt_fileattach(struct knote *kn) } /*ARGSUSED*/ -int +static int kqueue_kqfilter(struct file *fp, struct knote *kn) { struct kqueue *kq = kn->kn_fp->f_data; @@ -679,7 +688,34 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) int sys_kqueue(struct thread *td, struct kqueue_args *uap) { - return (kern_kqueue(td)); + struct filedesc *fdp; + struct kqueue *kq; + struct file *fp; + int fd, error; + + fdp = td->td_proc->p_fd; + error = falloc(td, &fp, &fd, 0); + if (error) + goto done2; + + /* An extra reference on `fp' has been held for us by falloc(). */ + kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); + mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); + TAILQ_INIT(&kq->kq_head); + kq->kq_fdp = fdp; + knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); + TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); + + FILEDESC_XLOCK(fdp); + TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); + FILEDESC_XUNLOCK(fdp); + + finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); + fdrop(fp, td); + + td->td_retval[0] = fd; +done2: + return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -780,76 +816,20 @@ kevent_copyin(void *arg, struct kevent *kevp, int count) return (error); } -int -kern_kqueue(struct thread *td) -{ - struct file *fp; - int error; - - error = kern_kqueue_locked(td, &fp); - - fdrop(fp, td); - return (error); -} - -int -kern_kqueue_locked(struct thread *td, struct file **fpp) -{ - struct filedesc *fdp; - struct kqueue *kq; - struct file *fp; - int fd, error; - - fdp = td->td_proc->p_fd; - error = falloc(td, &fp, &fd, 0); - if (error) - return (error); - - /* An extra reference on `fp' has been held for us by falloc(). */ - kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); - mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); - TAILQ_INIT(&kq->kq_head); - kq->kq_fdp = fdp; - knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); - TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); - - FILEDESC_XLOCK(fdp); - TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); - FILEDESC_XUNLOCK(fdp); - - finit(fp, FREAD | FWRITE, DTYPE_KQUEUE, kq, &kqueueops); - - td->td_retval[0] = fd; - *fpp = fp; - return (0); -} - int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) -{ - struct file *fp; - cap_rights_t rights; - int error; - - if ((error = fget(td, fd, cap_rights_init(&rights, CAP_POST_EVENT), &fp)) != 0) - return (error); - - error = kern_kevent_locked(td, fp, nchanges, nevents, k_ops, timeout); - - fdrop(fp, td); - return (error); -} - -int -kern_kevent_locked(struct thread *td, struct file *fp, int nchanges, int nevents, - struct kevent_copyops *k_ops, const struct timespec *timeout) { struct kevent keva[KQ_NEVENTS]; struct kevent *kevp, *changes; struct kqueue *kq; + struct file *fp; + cap_rights_t rights; int i, n, nerrors, error; + error = fget(td, fd, cap_rights_init(&rights, CAP_POST_EVENT), &fp); + if (error != 0) + return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto done_norel; @@ -892,6 +872,7 @@ kern_kevent_locked(struct thread *td, struct file *fp, int nchanges, int nevents done: kqueue_release(kq, 0); done_norel: + fdrop(fp, td); return (error); } @@ -1545,7 +1526,7 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, * This could be expanded to call kqueue_scan, if desired. */ /*ARGSUSED*/ -int +static int kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { @@ -1553,7 +1534,7 @@ kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, } /*ARGSUSED*/ -int +static int kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td) { @@ -1561,7 +1542,7 @@ kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, } /*ARGSUSED*/ -int +static int kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred, struct thread *td) { @@ -1570,7 +1551,7 @@ kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred, } /*ARGSUSED*/ -int +static int kqueue_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct thread *td) { @@ -1618,7 +1599,7 @@ kqueue_ioctl(struct file *fp, u_long cmd, void *data, } /*ARGSUSED*/ -int +static int kqueue_poll(struct file *fp, int events, struct ucred *active_cred, struct thread *td) { @@ -1645,7 +1626,7 @@ kqueue_poll(struct file *fp, int events, struct ucred *active_cred, } /*ARGSUSED*/ -int +static int kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, struct thread *td) { @@ -1663,7 +1644,7 @@ kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, } /*ARGSUSED*/ -int +static int kqueue_close(struct file *fp, struct thread *td) { struct kqueue *kq = fp->f_data; diff --git a/sys/modules/linux/Makefile b/sys/modules/linux/Makefile index ce46aa879a93..7ed6e989388f 100644 --- a/sys/modules/linux/Makefile +++ b/sys/modules/linux/Makefile @@ -9,7 +9,7 @@ CFLAGS+=-DCOMPAT_FREEBSD32 -DCOMPAT_LINUX32 KMOD= linux SRCS= linux_fork.c linux${SFX}_dummy.c linux_emul.c linux_file.c \ - linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c linux_epoll.c \ + linux_futex.c linux_getcwd.c linux_ioctl.c linux_ipc.c \ linux${SFX}_machdep.c linux_mib.c linux_misc.c linux_signal.c \ linux_socket.c linux_stats.c linux_sysctl.c linux${SFX}_sysent.c \ linux${SFX}_sysvec.c linux_uid16.c linux_util.c linux_time.c \ diff --git a/sys/sys/event.h b/sys/sys/event.h index 60bced72d2eb..03bd7b90cfa5 100644 --- a/sys/sys/event.h +++ b/sys/sys/event.h @@ -236,9 +236,6 @@ struct proc; struct knlist; struct mtx; struct rwlock; -struct uio; -struct stat; -struct ucred; extern void knote(struct knlist *list, long hint, int lockflags); extern void knote_fork(struct knlist *list, int pid); @@ -264,21 +261,6 @@ extern int kqfd_register(int fd, struct kevent *kev, struct thread *p, extern int kqueue_add_filteropts(int filt, struct filterops *filtops); extern int kqueue_del_filteropts(int filt); -int kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td); -int kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td); -int kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td); -int kqueue_ioctl(struct file *fp, u_long cmd, void *data, - struct ucred *active_cred, struct thread *td); -int kqueue_poll(struct file *fp, int events, struct ucred *active_cred, - struct thread *td); -int kqueue_kqfilter(struct file *fp, struct knote *kn); -int kqueue_stat(struct file *fp, struct stat *st, struct ucred *active_cred, - struct thread *td); -int kqueue_close(struct file *fp, struct thread *td); - #else /* !_KERNEL */ #include diff --git a/sys/sys/file.h b/sys/sys/file.h index b4c1ad4ead60..7b373f0d7091 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -169,8 +169,6 @@ struct file { union { struct cdev_privdata *fvn_cdevpriv; /* (d) Private data for the cdev. */ - void *fvn_epollpriv; - /* (d) Private data for the epoll. */ struct fadvise_info *fvn_advice; } f_vnun; /* diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 92dd8befb7a9..17f2b97db9ce 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -121,13 +121,8 @@ int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data); int kern_jail(struct thread *td, struct jail *j); int kern_jail_get(struct thread *td, struct uio *options, int flags); int kern_jail_set(struct thread *td, struct uio *options, int flags); -int kern_kqueue(struct thread *td); -int kern_kqueue_locked(struct thread *td, struct file **fpp); int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); -int kern_kevent_locked(struct thread *td, struct file *fp, int nchanges, - int nevents, - struct kevent_copyops *k_ops, const struct timespec *timeout); int kern_kldload(struct thread *td, const char *file, int *fileid); int kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat); int kern_kldunload(struct thread *td, int fileid, int flags); @@ -253,8 +248,6 @@ int kern_utimes(struct thread *td, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_utimesat(struct thread *td, int fd, char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); -int kern_utimensat(struct thread *td, int fd, char *path, - enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg); int kern_wait(struct thread *td, pid_t pid, int *status, int options, struct rusage *rup); int kern_wait6(struct thread *td, enum idtype idtype, id_t id, int *status,