freebsd-skq/sys/kern/syscalls.master

1000 lines
44 KiB
Plaintext
Raw Normal View History

1999-08-28 01:08:13 +00:00
$FreeBSD$
1994-08-02 07:55:43 +00:00
; from: @(#)syscalls.master 8.2 (Berkeley) 1/13/94
;
1994-05-24 10:09:53 +00:00
; System call name/number master file.
; Processed to created init_sysent.c, syscalls.c and syscall.h.
; Columns: number audit type name alt{name,tag,rtyp}/comments
1994-05-24 10:09:53 +00:00
; number system call number, must be in order
; audit the audit event associated with the system call
; A value of AUE_NULL means no auditing, but it also means that
; there is no audit event for the call at this time. For the
; case where the event exists, but we don't want auditing, the
; event should be #defined to AUE_NULL in audit_kevents.h.
; type one of STD, OBSOL, UNIMPL, COMPAT, COMPAT4, COMPAT6,
; COMPAT7, NODEF, NOARGS, NOPROTO, NOSTD
; The COMPAT* options may be combined with one or more NO*
; options separated by '|' with no spaces (e.g. COMPAT|NOARGS)
; name psuedo-prototype of syscall routine
; If one of the following alts is different, then all appear:
1994-05-24 10:09:53 +00:00
; altname name of system call if different
; alttag name of args struct tag if different from [o]`name'"_args"
; altrtyp return type if not int (bogus - syscalls always return int)
1994-05-24 10:09:53 +00:00
; for UNIMPL/OBSOL, name continues with comments
; types:
; STD always included
; COMPAT included on COMPAT #ifdef
; COMPAT4 included on COMPAT4 #ifdef (FreeBSD 4 compat)
2009-06-22 20:12:40 +00:00
; COMPAT6 included on COMPAT6 #ifdef (FreeBSD 6 compat)
; COMPAT7 included on COMPAT7 #ifdef (FreeBSD 7 compat)
1994-05-24 10:09:53 +00:00
; OBSOL obsolete, not included in system, only specifies name
; UNIMPL not implemented, placeholder only
; NOSTD implemented but as a lkm that can be statically
; compiled in; sysent entry will be filled with lkmressys
; so the SYSCALL_MODULE macro works
; NOARGS same as STD except do not create structure in sys/sysproto.h
; NODEF same as STD except only have the entry in the syscall table
; added. Meaning - do not create structure or function
; prototype in sys/sysproto.h
; NOPROTO same as STD except do not create structure or
; function prototype in sys/sysproto.h. Does add a
; definition to syscall.h besides adding a sysent.
; NOTSTATIC syscall is loadable
;
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
1994-05-24 10:09:53 +00:00
; #ifdef's, etc. may be included, and are copied to the output files.
#include <sys/param.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
1994-05-24 10:09:53 +00:00
; Reserved/unimplemented system calls in the range 0-150 inclusive
; are reserved for use in future Berkeley releases.
; Additional system calls implemented in vendor and other
; redistributions should be placed in the reserved range at the end
; of the current calls.
0 AUE_NULL STD { int nosys(void); } syscall nosys_args int
1 AUE_EXIT STD { void sys_exit(int rval); } exit \
sys_exit_args void
2 AUE_FORK STD { int fork(void); }
3 AUE_NULL STD { ssize_t read(int fd, void *buf, \
size_t nbyte); }
4 AUE_NULL STD { ssize_t write(int fd, const void *buf, \
size_t nbyte); }
5 AUE_OPEN_RWTC STD { int open(char *path, int flags, int mode); }
; XXX should be { int open(const char *path, int flags, ...); }
; but we're not ready for `const' or varargs.
; XXX man page says `mode_t mode'.
6 AUE_CLOSE STD { int close(int fd); }
7 AUE_WAIT4 STD { int wait4(int pid, int *status, \
int options, struct rusage *rusage); }
8 AUE_CREAT COMPAT { int creat(char *path, int mode); }
9 AUE_LINK STD { int link(char *path, char *link); }
10 AUE_UNLINK STD { int unlink(char *path); }
11 AUE_NULL OBSOL execv
12 AUE_CHDIR STD { int chdir(char *path); }
13 AUE_FCHDIR STD { int fchdir(int fd); }
14 AUE_MKNOD STD { int mknod(char *path, int mode, int dev); }
15 AUE_CHMOD STD { int chmod(char *path, int mode); }
16 AUE_CHOWN STD { int chown(char *path, int uid, int gid); }
17 AUE_NULL STD { int obreak(char *nsize); } break \
obreak_args int
18 AUE_GETFSSTAT COMPAT4 { int getfsstat(struct ostatfs *buf, \
long bufsize, int flags); }
19 AUE_LSEEK COMPAT { long lseek(int fd, long offset, \
int whence); }
20 AUE_GETPID STD { pid_t getpid(void); }
21 AUE_MOUNT STD { int mount(char *type, char *path, \
int flags, caddr_t data); }
; XXX `path' should have type `const char *' but we're not ready for that.
22 AUE_UMOUNT STD { int unmount(char *path, int flags); }
23 AUE_SETUID STD { int setuid(uid_t uid); }
24 AUE_GETUID STD { uid_t getuid(void); }
25 AUE_GETEUID STD { uid_t geteuid(void); }
26 AUE_PTRACE STD { int ptrace(int req, pid_t pid, \
caddr_t addr, int data); }
27 AUE_RECVMSG STD { int recvmsg(int s, struct msghdr *msg, \
int flags); }
28 AUE_SENDMSG STD { int sendmsg(int s, struct msghdr *msg, \
int flags); }
29 AUE_RECVFROM STD { int recvfrom(int s, caddr_t buf, \
size_t len, int flags, \
struct sockaddr * __restrict from, \
__socklen_t * __restrict fromlenaddr); }
30 AUE_ACCEPT STD { int accept(int s, \
struct sockaddr * __restrict name, \
__socklen_t * __restrict anamelen); }
31 AUE_GETPEERNAME STD { int getpeername(int fdes, \
struct sockaddr * __restrict asa, \
__socklen_t * __restrict alen); }
32 AUE_GETSOCKNAME STD { int getsockname(int fdes, \
struct sockaddr * __restrict asa, \
__socklen_t * __restrict alen); }
33 AUE_ACCESS STD { int access(char *path, int amode); }
34 AUE_CHFLAGS STD { int chflags(const char *path, u_long flags); }
35 AUE_FCHFLAGS STD { int fchflags(int fd, u_long flags); }
36 AUE_SYNC STD { int sync(void); }
37 AUE_KILL STD { int kill(int pid, int signum); }
38 AUE_STAT COMPAT { int stat(char *path, struct ostat *ub); }
39 AUE_GETPPID STD { pid_t getppid(void); }
40 AUE_LSTAT COMPAT { int lstat(char *path, struct ostat *ub); }
41 AUE_DUP STD { int dup(u_int fd); }
42 AUE_PIPE STD { int pipe(void); }
43 AUE_GETEGID STD { gid_t getegid(void); }
44 AUE_PROFILE STD { int profil(caddr_t samples, size_t size, \
size_t offset, u_int scale); }
45 AUE_KTRACE STD { int ktrace(const char *fname, int ops, \
int facs, int pid); }
46 AUE_SIGACTION COMPAT { int sigaction(int signum, \
struct osigaction *nsa, \
struct osigaction *osa); }
47 AUE_GETGID STD { gid_t getgid(void); }
48 AUE_SIGPROCMASK COMPAT { int sigprocmask(int how, osigset_t mask); }
; XXX note nonstandard (bogus) calling convention - the libc stub passes
; us the mask, not a pointer to it, and we return the old mask as the
; (int) return value.
49 AUE_GETLOGIN STD { int getlogin(char *namebuf, u_int \
namelen); }
50 AUE_SETLOGIN STD { int setlogin(char *namebuf); }
51 AUE_ACCT STD { int acct(char *path); }
52 AUE_SIGPENDING COMPAT { int sigpending(void); }
53 AUE_SIGALTSTACK STD { int sigaltstack(stack_t *ss, \
stack_t *oss); }
54 AUE_IOCTL STD { int ioctl(int fd, u_long com, \
caddr_t data); }
55 AUE_REBOOT STD { int reboot(int opt); }
56 AUE_REVOKE STD { int revoke(char *path); }
57 AUE_SYMLINK STD { int symlink(char *path, char *link); }
58 AUE_READLINK STD { ssize_t readlink(char *path, char *buf, \
size_t count); }
59 AUE_EXECVE STD { int execve(char *fname, char **argv, \
char **envv); }
60 AUE_UMASK STD { int umask(int newmask); } umask umask_args \
int
61 AUE_CHROOT STD { int chroot(char *path); }
62 AUE_FSTAT COMPAT { int fstat(int fd, struct ostat *sb); }
63 AUE_NULL COMPAT { int getkerninfo(int op, char *where, \
size_t *size, int arg); } getkerninfo \
getkerninfo_args int
64 AUE_NULL COMPAT { int getpagesize(void); } getpagesize \
getpagesize_args int
65 AUE_MSYNC STD { int msync(void *addr, size_t len, \
int flags); }
66 AUE_VFORK STD { int vfork(void); }
67 AUE_NULL OBSOL vread
68 AUE_NULL OBSOL vwrite
69 AUE_SBRK STD { int sbrk(int incr); }
70 AUE_SSTK STD { int sstk(int incr); }
71 AUE_MMAP COMPAT { int mmap(void *addr, int len, int prot, \
int flags, int fd, long pos); }
72 AUE_O_VADVISE STD { int ovadvise(int anom); } vadvise \
ovadvise_args int
73 AUE_MUNMAP STD { int munmap(void *addr, size_t len); }
74 AUE_MPROTECT STD { int mprotect(const void *addr, size_t len, \
int prot); }
75 AUE_MADVISE STD { int madvise(void *addr, size_t len, \
int behav); }
76 AUE_NULL OBSOL vhangup
77 AUE_NULL OBSOL vlimit
78 AUE_MINCORE STD { int mincore(const void *addr, size_t len, \
char *vec); }
79 AUE_GETGROUPS STD { int getgroups(u_int gidsetsize, \
gid_t *gidset); }
80 AUE_SETGROUPS STD { int setgroups(u_int gidsetsize, \
gid_t *gidset); }
81 AUE_GETPGRP STD { int getpgrp(void); }
82 AUE_SETPGRP STD { int setpgid(int pid, int pgid); }
83 AUE_SETITIMER STD { int setitimer(u_int which, struct \
itimerval *itv, struct itimerval *oitv); }
84 AUE_WAIT4 COMPAT { int wait(void); }
85 AUE_SWAPON STD { int swapon(char *name); }
86 AUE_GETITIMER STD { int getitimer(u_int which, \
struct itimerval *itv); }
87 AUE_SYSCTL COMPAT { int gethostname(char *hostname, \
u_int len); } gethostname \
gethostname_args int
88 AUE_SYSCTL COMPAT { int sethostname(char *hostname, \
u_int len); } sethostname \
sethostname_args int
89 AUE_GETDTABLESIZE STD { int getdtablesize(void); }
90 AUE_DUP2 STD { int dup2(u_int from, u_int to); }
91 AUE_NULL UNIMPL getdopt
92 AUE_FCNTL STD { int fcntl(int fd, int cmd, long arg); }
; XXX should be { int fcntl(int fd, int cmd, ...); }
; but we're not ready for varargs.
93 AUE_SELECT STD { int select(int nd, fd_set *in, fd_set *ou, \
fd_set *ex, struct timeval *tv); }
94 AUE_NULL UNIMPL setdopt
95 AUE_FSYNC STD { int fsync(int fd); }
96 AUE_SETPRIORITY STD { int setpriority(int which, int who, \
int prio); }
97 AUE_SOCKET STD { int socket(int domain, int type, \
int protocol); }
98 AUE_CONNECT STD { int connect(int s, caddr_t name, \
int namelen); }
99 AUE_ACCEPT COMPAT|NOARGS { int accept(int s, caddr_t name, \
int *anamelen); } accept accept_args int
100 AUE_GETPRIORITY STD { int getpriority(int which, int who); }
101 AUE_SEND COMPAT { int send(int s, caddr_t buf, int len, \
int flags); }
102 AUE_RECV COMPAT { int recv(int s, caddr_t buf, int len, \
int flags); }
103 AUE_SIGRETURN COMPAT { int sigreturn( \
struct osigcontext *sigcntxp); }
104 AUE_BIND STD { int bind(int s, caddr_t name, \
int namelen); }
105 AUE_SETSOCKOPT STD { int setsockopt(int s, int level, int name, \
caddr_t val, int valsize); }
106 AUE_LISTEN STD { int listen(int s, int backlog); }
107 AUE_NULL OBSOL vtimes
108 AUE_NULL COMPAT { int sigvec(int signum, struct sigvec *nsv, \
struct sigvec *osv); }
109 AUE_NULL COMPAT { int sigblock(int mask); }
110 AUE_NULL COMPAT { int sigsetmask(int mask); }
111 AUE_NULL COMPAT { int sigsuspend(osigset_t mask); }
; XXX note nonstandard (bogus) calling convention - the libc stub passes
; us the mask, not a pointer to it.
112 AUE_NULL COMPAT { int sigstack(struct sigstack *nss, \
struct sigstack *oss); }
113 AUE_RECVMSG COMPAT { int recvmsg(int s, struct omsghdr *msg, \
int flags); }
114 AUE_SENDMSG COMPAT { int sendmsg(int s, caddr_t msg, \
int flags); }
115 AUE_NULL OBSOL vtrace
116 AUE_GETTIMEOFDAY STD { int gettimeofday(struct timeval *tp, \
struct timezone *tzp); }
117 AUE_GETRUSAGE STD { int getrusage(int who, \
struct rusage *rusage); }
118 AUE_GETSOCKOPT STD { int getsockopt(int s, int level, int name, \
caddr_t val, int *avalsize); }
119 AUE_NULL UNIMPL resuba (BSD/OS 2.x)
120 AUE_READV STD { int readv(int fd, struct iovec *iovp, \
u_int iovcnt); }
121 AUE_WRITEV STD { int writev(int fd, struct iovec *iovp, \
u_int iovcnt); }
122 AUE_SETTIMEOFDAY STD { int settimeofday(struct timeval *tv, \
struct timezone *tzp); }
123 AUE_FCHOWN STD { int fchown(int fd, int uid, int gid); }
124 AUE_FCHMOD STD { int fchmod(int fd, int mode); }
125 AUE_RECVFROM COMPAT|NOARGS { int recvfrom(int s, caddr_t buf, \
size_t len, int flags, caddr_t from, int \
*fromlenaddr); } recvfrom recvfrom_args \
int
126 AUE_SETREUID STD { int setreuid(int ruid, int euid); }
127 AUE_SETREGID STD { int setregid(int rgid, int egid); }
128 AUE_RENAME STD { int rename(char *from, char *to); }
129 AUE_TRUNCATE COMPAT { int truncate(char *path, long length); }
130 AUE_FTRUNCATE COMPAT { int ftruncate(int fd, long length); }
131 AUE_FLOCK STD { int flock(int fd, int how); }
132 AUE_MKFIFO STD { int mkfifo(char *path, int mode); }
133 AUE_SENDTO STD { int sendto(int s, caddr_t buf, size_t len, \
int flags, caddr_t to, int tolen); }
134 AUE_SHUTDOWN STD { int shutdown(int s, int how); }
135 AUE_SOCKETPAIR STD { int socketpair(int domain, int type, \
int protocol, int *rsv); }
136 AUE_MKDIR STD { int mkdir(char *path, int mode); }
137 AUE_RMDIR STD { int rmdir(char *path); }
138 AUE_UTIMES STD { int utimes(char *path, \
struct timeval *tptr); }
139 AUE_NULL OBSOL 4.2 sigreturn
140 AUE_ADJTIME STD { int adjtime(struct timeval *delta, \
struct timeval *olddelta); }
141 AUE_GETPEERNAME COMPAT { int getpeername(int fdes, caddr_t asa, \
int *alen); }
142 AUE_SYSCTL COMPAT { long gethostid(void); }
143 AUE_SYSCTL COMPAT { int sethostid(long hostid); }
144 AUE_GETRLIMIT COMPAT { int getrlimit(u_int which, struct \
orlimit *rlp); }
145 AUE_SETRLIMIT COMPAT { int setrlimit(u_int which, \
struct orlimit *rlp); }
146 AUE_KILLPG COMPAT { int killpg(int pgid, int signum); }
147 AUE_SETSID STD { int setsid(void); }
148 AUE_QUOTACTL STD { int quotactl(char *path, int cmd, int uid, \
caddr_t arg); }
149 AUE_O_QUOTA COMPAT { int quota(void); }
150 AUE_GETSOCKNAME COMPAT|NOARGS { int getsockname(int fdec, \
caddr_t asa, int *alen); } getsockname \
getsockname_args int
1994-05-24 10:09:53 +00:00
; Syscalls 151-180 inclusive are reserved for vendor-specific
; system calls. (This includes various calls added for compatibity
; with other Unix variants.)
; Some of these calls are now supported by BSD...
151 AUE_NULL UNIMPL sem_lock (BSD/OS 2.x)
152 AUE_NULL UNIMPL sem_wakeup (BSD/OS 2.x)
153 AUE_NULL UNIMPL asyncdaemon (BSD/OS 2.x)
Add the new kernel-mode NFS Lock Manager. To use it instead of the user-mode lock manager, build a kernel with the NFSLOCKD option and add '-k' to 'rpc_lockd_flags' in rc.conf. Highlights include: * Thread-safe kernel RPC client - many threads can use the same RPC client handle safely with replies being de-multiplexed at the socket upcall (typically driven directly by the NIC interrupt) and handed off to whichever thread matches the reply. For UDP sockets, many RPC clients can share the same socket. This allows the use of a single privileged UDP port number to talk to an arbitrary number of remote hosts. * Single-threaded kernel RPC server. Adding support for multi-threaded server would be relatively straightforward and would follow approximately the Solaris KPI. A single thread should be sufficient for the NLM since it should rarely block in normal operation. * Kernel mode NLM server supporting cancel requests and granted callbacks. I've tested the NLM server reasonably extensively - it passes both my own tests and the NFS Connectathon locking tests running on Solaris, Mac OS X and Ubuntu Linux. * Userland NLM client supported. While the NLM server doesn't have support for the local NFS client's locking needs, it does have to field async replies and granted callbacks from remote NLMs that the local client has contacted. We relay these replies to the userland rpc.lockd over a local domain RPC socket. * Robust deadlock detection for the local lock manager. In particular it will detect deadlocks caused by a lock request that covers more than one blocking request. As required by the NLM protocol, all deadlock detection happens synchronously - a user is guaranteed that if a lock request isn't rejected immediately, the lock will eventually be granted. The old system allowed for a 'deferred deadlock' condition where a blocked lock request could wake up and find that some other deadlock-causing lock owner had beaten them to the lock. * Since both local and remote locks are managed by the same kernel locking code, local and remote processes can safely use file locks for mutual exclusion. Local processes have no fairness advantage compared to remote processes when contending to lock a region that has just been unlocked - the local lock manager enforces a strict first-come first-served model for both local and remote lockers. Sponsored by: Isilon Systems PR: 95247 107555 115524 116679 MFC after: 2 weeks
2008-03-26 15:23:12 +00:00
; 154 is initialised by the NLM code, if present.
154 AUE_NULL NOSTD { int nlm_syscall(int debug_level, int grace_period, int addr_count, char **addrs); }
; 155 is initialized by the NFS code, if present.
155 AUE_NFS_SVC NOSTD { int nfssvc(int flag, caddr_t argp); }
156 AUE_GETDIRENTRIES COMPAT { int getdirentries(int fd, char *buf, \
u_int count, long *basep); }
157 AUE_STATFS COMPAT4 { int statfs(char *path, \
struct ostatfs *buf); }
158 AUE_FSTATFS COMPAT4 { int fstatfs(int fd, \
struct ostatfs *buf); }
159 AUE_NULL UNIMPL nosys
160 AUE_LGETFH STD { int lgetfh(char *fname, \
struct fhandle *fhp); }
161 AUE_NFS_GETFH STD { int getfh(char *fname, \
struct fhandle *fhp); }
162 AUE_SYSCTL COMPAT4 { int getdomainname(char *domainname, \
int len); }
163 AUE_SYSCTL COMPAT4 { int setdomainname(char *domainname, \
int len); }
164 AUE_NULL COMPAT4 { int uname(struct utsname *name); }
165 AUE_SYSARCH STD { int sysarch(int op, char *parms); }
166 AUE_RTPRIO STD { int rtprio(int function, pid_t pid, \
struct rtprio *rtp); }
167 AUE_NULL UNIMPL nosys
168 AUE_NULL UNIMPL nosys
169 AUE_SEMSYS NOSTD { int semsys(int which, int a2, int a3, \
int a4, int a5); }
; XXX should be { int semsys(int which, ...); }
170 AUE_MSGSYS NOSTD { int msgsys(int which, int a2, int a3, \
int a4, int a5, int a6); }
; XXX should be { int msgsys(int which, ...); }
171 AUE_SHMSYS NOSTD { int shmsys(int which, int a2, int a3, \
int a4); }
; XXX should be { int shmsys(int which, ...); }
172 AUE_NULL UNIMPL nosys
173 AUE_PREAD COMPAT6 { ssize_t pread(int fd, void *buf, \
size_t nbyte, int pad, off_t offset); }
174 AUE_PWRITE COMPAT6 { ssize_t pwrite(int fd, \
const void *buf, \
size_t nbyte, int pad, off_t offset); }
Add code to allow the system to handle multiple routing tables. This particular implementation is designed to be fully backwards compatible and to be MFC-able to 7.x (and 6.x) Currently the only protocol that can make use of the multiple tables is IPv4 Similar functionality exists in OpenBSD and Linux. From my notes: ----- One thing where FreeBSD has been falling behind, and which by chance I have some time to work on is "policy based routing", which allows different packet streams to be routed by more than just the destination address. Constraints: ------------ I want to make some form of this available in the 6.x tree (and by extension 7.x) , but FreeBSD in general needs it so I might as well do it in -current and back port the portions I need. One of the ways that this can be done is to have the ability to instantiate multiple kernel routing tables (which I will now refer to as "Forwarding Information Bases" or "FIBs" for political correctness reasons). Which FIB a particular packet uses to make the next hop decision can be decided by a number of mechanisms. The policies these mechanisms implement are the "Policies" referred to in "Policy based routing". One of the constraints I have if I try to back port this work to 6.x is that it must be implemented as a EXTENSION to the existing ABIs in 6.x so that third party applications do not need to be recompiled in timespan of the branch. This first version will not have some of the bells and whistles that will come with later versions. It will, for example, be limited to 16 tables in the first commit. Implementation method, Compatible version. (part 1) ------------------------------- For this reason I have implemented a "sufficient subset" of a multiple routing table solution in Perforce, and back-ported it to 6.x. (also in Perforce though not always caught up with what I have done in -current/P4). The subset allows a number of FIBs to be defined at compile time (8 is sufficient for my purposes in 6.x) and implements the changes needed to allow IPV4 to use them. I have not done the changes for ipv6 simply because I do not need it, and I do not have enough knowledge of ipv6 (e.g. neighbor discovery) needed to do it. Other protocol families are left untouched and should there be users with proprietary protocol families, they should continue to work and be oblivious to the existence of the extra FIBs. To understand how this is done, one must know that the current FIB code starts everything off with a single dimensional array of pointers to FIB head structures (One per protocol family), each of which in turn points to the trie of routes available to that family. The basic change in the ABI compatible version of the change is to extent that array to be a 2 dimensional array, so that instead of protocol family X looking at rt_tables[X] for the table it needs, it looks at rt_tables[Y][X] when for all protocol families except ipv4 Y is always 0. Code that is unaware of the change always just sees the first row of the table, which of course looks just like the one dimensional array that existed before. The entry points rtrequest(), rtalloc(), rtalloc1(), rtalloc_ign() are all maintained, but refer only to the first row of the array, so that existing callers in proprietary protocols can continue to do the "right thing". Some new entry points are added, for the exclusive use of ipv4 code called in_rtrequest(), in_rtalloc(), in_rtalloc1() and in_rtalloc_ign(), which have an extra argument which refers the code to the correct row. In addition, there are some new entry points (currently called rtalloc_fib() and friends) that check the Address family being looked up and call either rtalloc() (and friends) if the protocol is not IPv4 forcing the action to row 0 or to the appropriate row if it IS IPv4 (and that info is available). These are for calling from code that is not specific to any particular protocol. The way these are implemented would change in the non ABI preserving code to be added later. One feature of the first version of the code is that for ipv4, the interface routes show up automatically on all the FIBs, so that no matter what FIB you select you always have the basic direct attached hosts available to you. (rtinit() does this automatically). You CAN delete an interface route from one FIB should you want to but by default it's there. ARP information is also available in each FIB. It's assumed that the same machine would have the same MAC address, regardless of which FIB you are using to get to it. This brings us as to how the correct FIB is selected for an outgoing IPV4 packet. Firstly, all packets have a FIB associated with them. if nothing has been done to change it, it will be FIB 0. The FIB is changed in the following ways. Packets fall into one of a number of classes. 1/ locally generated packets, coming from a socket/PCB. Such packets select a FIB from a number associated with the socket/PCB. This in turn is inherited from the process, but can be changed by a socket option. The process in turn inherits it on fork. I have written a utility call setfib that acts a bit like nice.. setfib -3 ping target.example.com # will use fib 3 for ping. It is an obvious extension to make it a property of a jail but I have not done so. It can be achieved by combining the setfib and jail commands. 2/ packets received on an interface for forwarding. By default these packets would use table 0, (or possibly a number settable in a sysctl(not yet)). but prior to routing the firewall can inspect them (see below). (possibly in the future you may be able to associate a FIB with packets received on an interface.. An ifconfig arg, but not yet.) 3/ packets inspected by a packet classifier, which can arbitrarily associate a fib with it on a packet by packet basis. A fib assigned to a packet by a packet classifier (such as ipfw) would over-ride a fib associated by a more default source. (such as cases 1 or 2). 4/ a tcp listen socket associated with a fib will generate accept sockets that are associated with that same fib. 5/ Packets generated in response to some other packet (e.g. reset or icmp packets). These should use the FIB associated with the packet being reponded to. 6/ Packets generated during encapsulation. gif, tun and other tunnel interfaces will encapsulate using the FIB that was in effect withthe proces that set up the tunnel. thus setfib 1 ifconfig gif0 [tunnel instructions] will set the fib for the tunnel to use to be fib 1. Routing messages would be associated with their process, and thus select one FIB or another. messages from the kernel would be associated with the fib they refer to and would only be received by a routing socket associated with that fib. (not yet implemented) In addition Netstat has been edited to be able to cope with the fact that the array is now 2 dimensional. (It looks in system memory using libkvm (!)). Old versions of netstat see only the first FIB. In addition two sysctls are added to give: a) the number of FIBs compiled in (active) b) the default FIB of the calling process. Early testing experience: ------------------------- Basically our (IronPort's) appliance does this functionality already using ipfw fwd but that method has some drawbacks. For example, It can't fully simulate a routing table because it can't influence the socket's choice of local address when a connect() is done. Testing during the generating of these changes has been remarkably smooth so far. Multiple tables have co-existed with no notable side effects, and packets have been routes accordingly. ipfw has grown 2 new keywords: setfib N ip from anay to any count ip from any to any fib N In pf there seems to be a requirement to be able to give symbolic names to the fibs but I do not have that capacity. I am not sure if it is required. SCTP has interestingly enough built in support for this, called VRFs in Cisco parlance. it will be interesting to see how that handles it when it suddenly actually does something. Where to next: -------------------- After committing the ABI compatible version and MFCing it, I'd like to proceed in a forward direction in -current. this will result in some roto-tilling in the routing code. Firstly: the current code's idea of having a separate tree per protocol family, all of the same format, and pointed to by the 1 dimensional array is a bit silly. Especially when one considers that there is code that makes assumptions about every protocol having the same internal structures there. Some protocols don't WANT that sort of structure. (for example the whole idea of a netmask is foreign to appletalk). This needs to be made opaque to the external code. My suggested first change is to add routing method pointers to the 'domain' structure, along with information pointing the data. instead of having an array of pointers to uniform structures, there would be an array pointing to the 'domain' structures for each protocol address domain (protocol family), and the methods this reached would be called. The methods would have an argument that gives FIB number, but the protocol would be free to ignore it. When the ABI can be changed it raises the possibilty of the addition of a fib entry into the "struct route". Currently, the structure contains the sockaddr of the desination, and the resulting fib entry. To make this work fully, one could add a fib number so that given an address and a fib, one can find the third element, the fib entry. Interaction with the ARP layer/ LL layer would need to be revisited as well. Qing Li has been working on this already. This work was sponsored by Ironport Systems/Cisco Reviewed by: several including rwatson, bz and mlair (parts each) Obtained from: Ironport systems/Cisco
2008-05-09 23:03:00 +00:00
175 AUE_NULL STD { int setfib(int fibnum); }
176 AUE_NTP_ADJTIME STD { int ntp_adjtime(struct timex *tp); }
177 AUE_NULL UNIMPL sfork (BSD/OS 2.x)
178 AUE_NULL UNIMPL getdescriptor (BSD/OS 2.x)
179 AUE_NULL UNIMPL setdescriptor (BSD/OS 2.x)
180 AUE_NULL UNIMPL nosys
; Syscalls 181-199 are used by/reserved for BSD
181 AUE_SETGID STD { int setgid(gid_t gid); }
182 AUE_SETEGID STD { int setegid(gid_t egid); }
183 AUE_SETEUID STD { int seteuid(uid_t euid); }
184 AUE_NULL UNIMPL lfs_bmapv
185 AUE_NULL UNIMPL lfs_markv
186 AUE_NULL UNIMPL lfs_segclean
187 AUE_NULL UNIMPL lfs_segwait
188 AUE_STAT STD { int stat(char *path, struct stat *ub); }
189 AUE_FSTAT STD { int fstat(int fd, struct stat *sb); }
190 AUE_LSTAT STD { int lstat(char *path, struct stat *ub); }
191 AUE_PATHCONF STD { int pathconf(char *path, int name); }
192 AUE_FPATHCONF STD { int fpathconf(int fd, int name); }
193 AUE_NULL UNIMPL nosys
194 AUE_GETRLIMIT STD { int getrlimit(u_int which, \
struct rlimit *rlp); } getrlimit \
__getrlimit_args int
195 AUE_SETRLIMIT STD { int setrlimit(u_int which, \
struct rlimit *rlp); } setrlimit \
__setrlimit_args int
196 AUE_GETDIRENTRIES STD { int getdirentries(int fd, char *buf, \
u_int count, long *basep); }
197 AUE_MMAP COMPAT6 { caddr_t mmap(caddr_t addr, \
size_t len, int prot, int flags, int fd, \
int pad, off_t pos); }
198 AUE_NULL NOPROTO { int nosys(void); } __syscall \
__syscall_args int
199 AUE_LSEEK COMPAT6 { off_t lseek(int fd, int pad, \
off_t offset, int whence); }
200 AUE_TRUNCATE COMPAT6 { int truncate(char *path, int pad, \
off_t length); }
201 AUE_FTRUNCATE COMPAT6 { int ftruncate(int fd, int pad, \
off_t length); }
202 AUE_SYSCTL STD { int __sysctl(int *name, u_int namelen, \
void *old, size_t *oldlenp, void *new, \
size_t newlen); } __sysctl sysctl_args int
203 AUE_MLOCK STD { int mlock(const void *addr, size_t len); }
204 AUE_MUNLOCK STD { int munlock(const void *addr, size_t len); }
205 AUE_UNDELETE STD { int undelete(char *path); }
206 AUE_FUTIMES STD { int futimes(int fd, struct timeval *tptr); }
207 AUE_GETPGID STD { int getpgid(pid_t pid); }
208 AUE_NULL UNIMPL newreboot (NetBSD)
209 AUE_POLL STD { int poll(struct pollfd *fds, u_int nfds, \
int timeout); }
;
; The following are reserved for loadable syscalls
;
210 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
211 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
212 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
213 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
214 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
215 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
216 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
217 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
218 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
219 AUE_NULL NODEF|NOTSTATIC lkmnosys lkmnosys nosys_args int
;
; The following were introduced with NetBSD/4.4Lite-2
Change the ABI of some of the structures used by the SYSV IPC API: - The uid/cuid members of struct ipc_perm are now uid_t instead of unsigned short. - The gid/cgid members of struct ipc_perm are now gid_t instead of unsigned short. - The mode member of struct ipc_perm is now mode_t instead of unsigned short (this is merely a style bug). - The rather dubious padding fields for ABI compat with SV/I386 have been removed from struct msqid_ds and struct semid_ds. - The shm_segsz member of struct shmid_ds is now a size_t instead of an int. This removes the need for the shm_bsegsz member in struct shmid_kernel and should allow for complete support of SYSV SHM regions >= 2GB. - The shm_nattch member of struct shmid_ds is now an int instead of a short. - The shm_internal member of struct shmid_ds is now gone. The internal VM object pointer for SHM regions has been moved into struct shmid_kernel. - The existing __semctl(), msgctl(), and shmctl() system call entries are now marked COMPAT7 and new versions of those system calls which support the new ABI are now present. - The new system calls are assigned to the FBSD-1.1 version in libc. The FBSD-1.0 symbols in libc now refer to the old COMPAT7 system calls. - A simplistic framework for tagging system calls with compatibility symbol versions has been added to libc. Version tags are added to system calls by adding an appropriate __sym_compat() entry to src/lib/libc/incldue/compat.h. [1] PR: kern/16195 kern/113218 bin/129855 Reviewed by: arch@, rwatson Discussed with: kan, kib [1]
2009-06-24 21:10:52 +00:00
220 AUE_SEMCTL COMPAT7|NOSTD { int __semctl(int semid, int semnum, \
int cmd, union semun_old *arg); }
221 AUE_SEMGET NOSTD { int semget(key_t key, int nsems, \
int semflg); }
222 AUE_SEMOP NOSTD { int semop(int semid, struct sembuf *sops, \
size_t nsops); }
223 AUE_NULL UNIMPL semconfig
Change the ABI of some of the structures used by the SYSV IPC API: - The uid/cuid members of struct ipc_perm are now uid_t instead of unsigned short. - The gid/cgid members of struct ipc_perm are now gid_t instead of unsigned short. - The mode member of struct ipc_perm is now mode_t instead of unsigned short (this is merely a style bug). - The rather dubious padding fields for ABI compat with SV/I386 have been removed from struct msqid_ds and struct semid_ds. - The shm_segsz member of struct shmid_ds is now a size_t instead of an int. This removes the need for the shm_bsegsz member in struct shmid_kernel and should allow for complete support of SYSV SHM regions >= 2GB. - The shm_nattch member of struct shmid_ds is now an int instead of a short. - The shm_internal member of struct shmid_ds is now gone. The internal VM object pointer for SHM regions has been moved into struct shmid_kernel. - The existing __semctl(), msgctl(), and shmctl() system call entries are now marked COMPAT7 and new versions of those system calls which support the new ABI are now present. - The new system calls are assigned to the FBSD-1.1 version in libc. The FBSD-1.0 symbols in libc now refer to the old COMPAT7 system calls. - A simplistic framework for tagging system calls with compatibility symbol versions has been added to libc. Version tags are added to system calls by adding an appropriate __sym_compat() entry to src/lib/libc/incldue/compat.h. [1] PR: kern/16195 kern/113218 bin/129855 Reviewed by: arch@, rwatson Discussed with: kan, kib [1]
2009-06-24 21:10:52 +00:00
224 AUE_MSGCTL COMPAT7|NOSTD { int msgctl(int msqid, int cmd, \
struct msqid_ds_old *buf); }
225 AUE_MSGGET NOSTD { int msgget(key_t key, int msgflg); }
226 AUE_MSGSND NOSTD { int msgsnd(int msqid, const void *msgp, \
size_t msgsz, int msgflg); }
227 AUE_MSGRCV NOSTD { int msgrcv(int msqid, void *msgp, \
size_t msgsz, long msgtyp, int msgflg); }
228 AUE_SHMAT NOSTD { int shmat(int shmid, const void *shmaddr, \
int shmflg); }
Change the ABI of some of the structures used by the SYSV IPC API: - The uid/cuid members of struct ipc_perm are now uid_t instead of unsigned short. - The gid/cgid members of struct ipc_perm are now gid_t instead of unsigned short. - The mode member of struct ipc_perm is now mode_t instead of unsigned short (this is merely a style bug). - The rather dubious padding fields for ABI compat with SV/I386 have been removed from struct msqid_ds and struct semid_ds. - The shm_segsz member of struct shmid_ds is now a size_t instead of an int. This removes the need for the shm_bsegsz member in struct shmid_kernel and should allow for complete support of SYSV SHM regions >= 2GB. - The shm_nattch member of struct shmid_ds is now an int instead of a short. - The shm_internal member of struct shmid_ds is now gone. The internal VM object pointer for SHM regions has been moved into struct shmid_kernel. - The existing __semctl(), msgctl(), and shmctl() system call entries are now marked COMPAT7 and new versions of those system calls which support the new ABI are now present. - The new system calls are assigned to the FBSD-1.1 version in libc. The FBSD-1.0 symbols in libc now refer to the old COMPAT7 system calls. - A simplistic framework for tagging system calls with compatibility symbol versions has been added to libc. Version tags are added to system calls by adding an appropriate __sym_compat() entry to src/lib/libc/incldue/compat.h. [1] PR: kern/16195 kern/113218 bin/129855 Reviewed by: arch@, rwatson Discussed with: kan, kib [1]
2009-06-24 21:10:52 +00:00
229 AUE_SHMCTL COMPAT7|NOSTD { int shmctl(int shmid, int cmd, \
struct shmid_ds_old *buf); }
230 AUE_SHMDT NOSTD { int shmdt(const void *shmaddr); }
231 AUE_SHMGET NOSTD { int shmget(key_t key, size_t size, \
int shmflg); }
;
232 AUE_NULL STD { int clock_gettime(clockid_t clock_id, \
struct timespec *tp); }
233 AUE_CLOCK_SETTIME STD { int clock_settime( \
clockid_t clock_id, \
const struct timespec *tp); }
234 AUE_NULL STD { int clock_getres(clockid_t clock_id, \
struct timespec *tp); }
235 AUE_NULL STD { int ktimer_create(clockid_t clock_id, \
struct sigevent *evp, int *timerid); }
236 AUE_NULL STD { int ktimer_delete(int timerid); }
237 AUE_NULL STD { int ktimer_settime(int timerid, int flags, \
const struct itimerspec *value, \
struct itimerspec *ovalue); }
238 AUE_NULL STD { int ktimer_gettime(int timerid, struct \
itimerspec *value); }
239 AUE_NULL STD { int ktimer_getoverrun(int timerid); }
240 AUE_NULL STD { int nanosleep(const struct timespec *rqtp, \
struct timespec *rmtp); }
241 AUE_NULL STD { int ffclock_getcounter(ffcounter *ffcount); }
242 AUE_NULL STD { int ffclock_setestimate( \
struct ffclock_estimate *cest); }
243 AUE_NULL STD { int ffclock_getestimate( \
struct ffclock_estimate *cest); }
244 AUE_NULL UNIMPL nosys
245 AUE_NULL UNIMPL nosys
246 AUE_NULL UNIMPL nosys
247 AUE_NULL STD { int clock_getcpuclockid2(id_t id,\
int which, clockid_t *clock_id); }
248 AUE_NULL STD { int ntp_gettime(struct ntptimeval *ntvp); }
249 AUE_NULL UNIMPL nosys
; syscall numbers initially used in OpenBSD
250 AUE_MINHERIT STD { int minherit(void *addr, size_t len, \
int inherit); }
251 AUE_RFORK STD { int rfork(int flags); }
252 AUE_POLL STD { int openbsd_poll(struct pollfd *fds, \
u_int nfds, int timeout); }
253 AUE_ISSETUGID STD { int issetugid(void); }
254 AUE_LCHOWN STD { int lchown(char *path, int uid, int gid); }
255 AUE_NULL STD { int aio_read(struct aiocb *aiocbp); }
256 AUE_NULL STD { int aio_write(struct aiocb *aiocbp); }
257 AUE_NULL STD { int lio_listio(int mode, \
struct aiocb * const *acb_list, \
int nent, struct sigevent *sig); }
258 AUE_NULL UNIMPL nosys
259 AUE_NULL UNIMPL nosys
260 AUE_NULL UNIMPL nosys
261 AUE_NULL UNIMPL nosys
262 AUE_NULL UNIMPL nosys
263 AUE_NULL UNIMPL nosys
264 AUE_NULL UNIMPL nosys
265 AUE_NULL UNIMPL nosys
266 AUE_NULL UNIMPL nosys
267 AUE_NULL UNIMPL nosys
268 AUE_NULL UNIMPL nosys
269 AUE_NULL UNIMPL nosys
270 AUE_NULL UNIMPL nosys
271 AUE_NULL UNIMPL nosys
272 AUE_O_GETDENTS STD { int getdents(int fd, char *buf, \
size_t count); }
273 AUE_NULL UNIMPL nosys
274 AUE_LCHMOD STD { int lchmod(char *path, mode_t mode); }
275 AUE_LCHOWN NOPROTO { int lchown(char *path, uid_t uid, \
gid_t gid); } netbsd_lchown lchown_args \
int
276 AUE_LUTIMES STD { int lutimes(char *path, \
struct timeval *tptr); }
277 AUE_MSYNC NOPROTO { int msync(void *addr, size_t len, \
int flags); } netbsd_msync msync_args int
278 AUE_STAT STD { int nstat(char *path, struct nstat *ub); }
279 AUE_FSTAT STD { int nfstat(int fd, struct nstat *sb); }
280 AUE_LSTAT STD { int nlstat(char *path, struct nstat *ub); }
281 AUE_NULL UNIMPL nosys
282 AUE_NULL UNIMPL nosys
283 AUE_NULL UNIMPL nosys
284 AUE_NULL UNIMPL nosys
285 AUE_NULL UNIMPL nosys
286 AUE_NULL UNIMPL nosys
287 AUE_NULL UNIMPL nosys
288 AUE_NULL UNIMPL nosys
; 289 and 290 from NetBSD (OpenBSD: 267 and 268)
289 AUE_PREADV STD { ssize_t preadv(int fd, struct iovec *iovp, \
u_int iovcnt, off_t offset); }
290 AUE_PWRITEV STD { ssize_t pwritev(int fd, struct iovec *iovp, \
u_int iovcnt, off_t offset); }
291 AUE_NULL UNIMPL nosys
292 AUE_NULL UNIMPL nosys
293 AUE_NULL UNIMPL nosys
294 AUE_NULL UNIMPL nosys
295 AUE_NULL UNIMPL nosys
296 AUE_NULL UNIMPL nosys
; XXX 297 is 300 in NetBSD
297 AUE_FHSTATFS COMPAT4 { int fhstatfs( \
const struct fhandle *u_fhp, \
struct ostatfs *buf); }
298 AUE_FHOPEN STD { int fhopen(const struct fhandle *u_fhp, \
int flags); }
299 AUE_FHSTAT STD { int fhstat(const struct fhandle *u_fhp, \
struct stat *sb); }
; syscall numbers for FreeBSD
300 AUE_NULL STD { int modnext(int modid); }
301 AUE_NULL STD { int modstat(int modid, \
struct module_stat *stat); }
302 AUE_NULL STD { int modfnext(int modid); }
303 AUE_NULL STD { int modfind(const char *name); }
304 AUE_MODLOAD STD { int kldload(const char *file); }
305 AUE_MODUNLOAD STD { int kldunload(int fileid); }
306 AUE_NULL STD { int kldfind(const char *file); }
307 AUE_NULL STD { int kldnext(int fileid); }
308 AUE_NULL STD { int kldstat(int fileid, struct \
kld_file_stat* stat); }
309 AUE_NULL STD { int kldfirstmod(int fileid); }
310 AUE_GETSID STD { int getsid(pid_t pid); }
311 AUE_SETRESUID STD { int setresuid(uid_t ruid, uid_t euid, \
uid_t suid); }
312 AUE_SETRESGID STD { int setresgid(gid_t rgid, gid_t egid, \
gid_t sgid); }
313 AUE_NULL OBSOL signanosleep
314 AUE_NULL STD { ssize_t aio_return(struct aiocb *aiocbp); }
315 AUE_NULL STD { int aio_suspend( \
struct aiocb * const * aiocbp, int nent, \
const struct timespec *timeout); }
316 AUE_NULL STD { int aio_cancel(int fd, \
struct aiocb *aiocbp); }
317 AUE_NULL STD { int aio_error(struct aiocb *aiocbp); }
318 AUE_NULL COMPAT6 { int aio_read(struct oaiocb *aiocbp); }
319 AUE_NULL COMPAT6 { int aio_write(struct oaiocb *aiocbp); }
320 AUE_NULL COMPAT6 { int lio_listio(int mode, \
struct oaiocb * const *acb_list, \
int nent, struct osigevent *sig); }
321 AUE_NULL STD { int yield(void); }
322 AUE_NULL OBSOL thr_sleep
323 AUE_NULL OBSOL thr_wakeup
324 AUE_MLOCKALL STD { int mlockall(int how); }
325 AUE_MUNLOCKALL STD { int munlockall(void); }
326 AUE_GETCWD STD { int __getcwd(char *buf, u_int buflen); }
327 AUE_NULL STD { int sched_setparam (pid_t pid, \
const struct sched_param *param); }
328 AUE_NULL STD { int sched_getparam (pid_t pid, struct \
sched_param *param); }
329 AUE_NULL STD { int sched_setscheduler (pid_t pid, int \
policy, const struct sched_param \
*param); }
330 AUE_NULL STD { int sched_getscheduler (pid_t pid); }
331 AUE_NULL STD { int sched_yield (void); }
332 AUE_NULL STD { int sched_get_priority_max (int policy); }
333 AUE_NULL STD { int sched_get_priority_min (int policy); }
334 AUE_NULL STD { int sched_rr_get_interval (pid_t pid, \
struct timespec *interval); }
335 AUE_NULL STD { int utrace(const void *addr, size_t len); }
336 AUE_SENDFILE COMPAT4 { int sendfile(int fd, int s, \
off_t offset, size_t nbytes, \
struct sf_hdtr *hdtr, off_t *sbytes, \
int flags); }
337 AUE_NULL STD { int kldsym(int fileid, int cmd, \
void *data); }
338 AUE_JAIL STD { int jail(struct jail *jail); }
339 AUE_NULL NOSTD|NOTSTATIC { int nnpfs_syscall(int operation, \
char *a_pathP, int a_opcode, \
void *a_paramsP, int a_followSymlinks); }
340 AUE_SIGPROCMASK STD { int sigprocmask(int how, \
const sigset_t *set, sigset_t *oset); }
341 AUE_SIGSUSPEND STD { int sigsuspend(const sigset_t *sigmask); }
342 AUE_SIGACTION COMPAT4 { int sigaction(int sig, const \
struct sigaction *act, \
struct sigaction *oact); }
343 AUE_SIGPENDING STD { int sigpending(sigset_t *set); }
344 AUE_SIGRETURN COMPAT4 { int sigreturn( \
const struct ucontext4 *sigcntxp); }
345 AUE_SIGWAIT STD { int sigtimedwait(const sigset_t *set, \
siginfo_t *info, \
const struct timespec *timeout); }
346 AUE_NULL STD { int sigwaitinfo(const sigset_t *set, \
siginfo_t *info); }
347 AUE_NULL STD { int __acl_get_file(const char *path, \
acl_type_t type, struct acl *aclp); }
348 AUE_NULL STD { int __acl_set_file(const char *path, \
acl_type_t type, struct acl *aclp); }
349 AUE_NULL STD { int __acl_get_fd(int filedes, \
acl_type_t type, struct acl *aclp); }
350 AUE_NULL STD { int __acl_set_fd(int filedes, \
acl_type_t type, struct acl *aclp); }
351 AUE_NULL STD { int __acl_delete_file(const char *path, \
acl_type_t type); }
352 AUE_NULL STD { int __acl_delete_fd(int filedes, \
acl_type_t type); }
353 AUE_NULL STD { int __acl_aclcheck_file(const char *path, \
acl_type_t type, struct acl *aclp); }
354 AUE_NULL STD { int __acl_aclcheck_fd(int filedes, \
acl_type_t type, struct acl *aclp); }
355 AUE_EXTATTRCTL STD { int extattrctl(const char *path, int cmd, \
const char *filename, int attrnamespace, \
const char *attrname); }
356 AUE_EXTATTR_SET_FILE STD { ssize_t extattr_set_file( \
const char *path, int attrnamespace, \
const char *attrname, void *data, \
size_t nbytes); }
357 AUE_EXTATTR_GET_FILE STD { ssize_t extattr_get_file( \
const char *path, int attrnamespace, \
const char *attrname, void *data, \
size_t nbytes); }
358 AUE_EXTATTR_DELETE_FILE STD { int extattr_delete_file(const char *path, \
int attrnamespace, \
const char *attrname); }
359 AUE_NULL STD { ssize_t aio_waitcomplete( \
struct aiocb **aiocbp, \
struct timespec *timeout); }
360 AUE_GETRESUID STD { int getresuid(uid_t *ruid, uid_t *euid, \
uid_t *suid); }
361 AUE_GETRESGID STD { int getresgid(gid_t *rgid, gid_t *egid, \
gid_t *sgid); }
362 AUE_KQUEUE STD { int kqueue(void); }
363 AUE_NULL STD { int kevent(int fd, \
struct kevent *changelist, int nchanges, \
struct kevent *eventlist, int nevents, \
const struct timespec *timeout); }
364 AUE_NULL UNIMPL __cap_get_proc
365 AUE_NULL UNIMPL __cap_set_proc
366 AUE_NULL UNIMPL __cap_get_fd
367 AUE_NULL UNIMPL __cap_get_file
368 AUE_NULL UNIMPL __cap_set_fd
369 AUE_NULL UNIMPL __cap_set_file
370 AUE_NULL UNIMPL nosys
371 AUE_EXTATTR_SET_FD STD { ssize_t extattr_set_fd(int fd, \
int attrnamespace, const char *attrname, \
void *data, size_t nbytes); }
372 AUE_EXTATTR_GET_FD STD { ssize_t extattr_get_fd(int fd, \
int attrnamespace, const char *attrname, \
void *data, size_t nbytes); }
373 AUE_EXTATTR_DELETE_FD STD { int extattr_delete_fd(int fd, \
int attrnamespace, \
const char *attrname); }
374 AUE_NULL STD { int __setugid(int flag); }
375 AUE_NULL UNIMPL nfsclnt
376 AUE_EACCESS STD { int eaccess(char *path, int amode); }
377 AUE_NULL NOSTD|NOTSTATIC { int afs3_syscall(long syscall, \
long parm1, long parm2, long parm3, \
long parm4, long parm5, long parm6); }
378 AUE_NMOUNT STD { int nmount(struct iovec *iovp, \
unsigned int iovcnt, int flags); }
379 AUE_NULL UNIMPL kse_exit
380 AUE_NULL UNIMPL kse_wakeup
381 AUE_NULL UNIMPL kse_create
382 AUE_NULL UNIMPL kse_thr_interrupt
383 AUE_NULL UNIMPL kse_release
384 AUE_NULL STD { int __mac_get_proc(struct mac *mac_p); }
385 AUE_NULL STD { int __mac_set_proc(struct mac *mac_p); }
386 AUE_NULL STD { int __mac_get_fd(int fd, \
struct mac *mac_p); }
387 AUE_NULL STD { int __mac_get_file(const char *path_p, \
struct mac *mac_p); }
388 AUE_NULL STD { int __mac_set_fd(int fd, \
struct mac *mac_p); }
389 AUE_NULL STD { int __mac_set_file(const char *path_p, \
struct mac *mac_p); }
390 AUE_NULL STD { int kenv(int what, const char *name, \
char *value, int len); }
391 AUE_LCHFLAGS STD { int lchflags(const char *path, \
u_long flags); }
392 AUE_NULL STD { int uuidgen(struct uuid *store, \
int count); }
393 AUE_SENDFILE STD { int sendfile(int fd, int s, off_t offset, \
size_t nbytes, struct sf_hdtr *hdtr, \
off_t *sbytes, int flags); }
394 AUE_NULL STD { int mac_syscall(const char *policy, \
int call, void *arg); }
395 AUE_GETFSSTAT STD { int getfsstat(struct statfs *buf, \
long bufsize, int flags); }
396 AUE_STATFS STD { int statfs(char *path, \
struct statfs *buf); }
397 AUE_FSTATFS STD { int fstatfs(int fd, struct statfs *buf); }
398 AUE_FHSTATFS STD { int fhstatfs(const struct fhandle *u_fhp, \
struct statfs *buf); }
399 AUE_NULL UNIMPL nosys
400 AUE_NULL NOSTD { int ksem_close(semid_t id); }
401 AUE_NULL NOSTD { int ksem_post(semid_t id); }
402 AUE_NULL NOSTD { int ksem_wait(semid_t id); }
403 AUE_NULL NOSTD { int ksem_trywait(semid_t id); }
404 AUE_NULL NOSTD { int ksem_init(semid_t *idp, \
unsigned int value); }
405 AUE_NULL NOSTD { int ksem_open(semid_t *idp, \
const char *name, int oflag, \
mode_t mode, unsigned int value); }
406 AUE_NULL NOSTD { int ksem_unlink(const char *name); }
407 AUE_NULL NOSTD { int ksem_getvalue(semid_t id, int *val); }
408 AUE_NULL NOSTD { int ksem_destroy(semid_t id); }
409 AUE_NULL STD { int __mac_get_pid(pid_t pid, \
struct mac *mac_p); }
410 AUE_NULL STD { int __mac_get_link(const char *path_p, \
struct mac *mac_p); }
411 AUE_NULL STD { int __mac_set_link(const char *path_p, \
struct mac *mac_p); }
412 AUE_EXTATTR_SET_LINK STD { ssize_t extattr_set_link( \
const char *path, int attrnamespace, \
const char *attrname, void *data, \
size_t nbytes); }
413 AUE_EXTATTR_GET_LINK STD { ssize_t extattr_get_link( \
const char *path, int attrnamespace, \
const char *attrname, void *data, \
size_t nbytes); }
414 AUE_EXTATTR_DELETE_LINK STD { int extattr_delete_link( \
const char *path, int attrnamespace, \
const char *attrname); }
415 AUE_NULL STD { int __mac_execve(char *fname, char **argv, \
char **envv, struct mac *mac_p); }
416 AUE_SIGACTION STD { int sigaction(int sig, \
const struct sigaction *act, \
struct sigaction *oact); }
417 AUE_SIGRETURN STD { int sigreturn( \
const struct __ucontext *sigcntxp); }
418 AUE_NULL UNIMPL __xstat
419 AUE_NULL UNIMPL __xfstat
420 AUE_NULL UNIMPL __xlstat
421 AUE_NULL STD { int getcontext(struct __ucontext *ucp); }
422 AUE_NULL STD { int setcontext( \
const struct __ucontext *ucp); }
423 AUE_NULL STD { int swapcontext(struct __ucontext *oucp, \
const struct __ucontext *ucp); }
424 AUE_SWAPOFF STD { int swapoff(const char *name); }
425 AUE_NULL STD { int __acl_get_link(const char *path, \
acl_type_t type, struct acl *aclp); }
426 AUE_NULL STD { int __acl_set_link(const char *path, \
acl_type_t type, struct acl *aclp); }
427 AUE_NULL STD { int __acl_delete_link(const char *path, \
acl_type_t type); }
428 AUE_NULL STD { int __acl_aclcheck_link(const char *path, \
acl_type_t type, struct acl *aclp); }
429 AUE_SIGWAIT STD { int sigwait(const sigset_t *set, \
int *sig); }
430 AUE_NULL STD { int thr_create(ucontext_t *ctx, long *id, \
int flags); }
431 AUE_NULL STD { void thr_exit(long *state); }
432 AUE_NULL STD { int thr_self(long *id); }
433 AUE_NULL STD { int thr_kill(long id, int sig); }
434 AUE_NULL UNIMPL nosys
435 AUE_NULL UNIMPL nosys
436 AUE_NULL STD { int jail_attach(int jid); }
437 AUE_EXTATTR_LIST_FD STD { ssize_t extattr_list_fd(int fd, \
int attrnamespace, void *data, \
size_t nbytes); }
438 AUE_EXTATTR_LIST_FILE STD { ssize_t extattr_list_file( \
const char *path, int attrnamespace, \
void *data, size_t nbytes); }
439 AUE_EXTATTR_LIST_LINK STD { ssize_t extattr_list_link( \
const char *path, int attrnamespace, \
void *data, size_t nbytes); }
440 AUE_NULL UNIMPL kse_switchin
441 AUE_NULL NOSTD { int ksem_timedwait(semid_t id, \
const struct timespec *abstime); }
442 AUE_NULL STD { int thr_suspend( \
const struct timespec *timeout); }
443 AUE_NULL STD { int thr_wake(long id); }
444 AUE_MODUNLOAD STD { int kldunloadf(int fileid, int flags); }
445 AUE_AUDIT STD { int audit(const void *record, \
u_int length); }
446 AUE_AUDITON STD { int auditon(int cmd, void *data, \
u_int length); }
447 AUE_GETAUID STD { int getauid(uid_t *auid); }
448 AUE_SETAUID STD { int setauid(uid_t *auid); }
449 AUE_GETAUDIT STD { int getaudit(struct auditinfo *auditinfo); }
450 AUE_SETAUDIT STD { int setaudit(struct auditinfo *auditinfo); }
451 AUE_GETAUDIT_ADDR STD { int getaudit_addr( \
struct auditinfo_addr *auditinfo_addr, \
u_int length); }
452 AUE_SETAUDIT_ADDR STD { int setaudit_addr( \
struct auditinfo_addr *auditinfo_addr, \
u_int length); }
453 AUE_AUDITCTL STD { int auditctl(char *path); }
This is initial version of POSIX priority mutex support, a new userland mutex structure is added as following: struct umutex { __lwpid_t m_owner; uint32_t m_flags; uint32_t m_ceilings[2]; uint32_t m_spare[4]; }; The m_owner represents owner thread, it is a thread id, in non-contested case, userland can simply use atomic_cmpset_int to lock the mutex, if the mutex is contested, high order bit will be set, and userland should do locking and unlocking via kernel syscall. Flag UMUTEX_PRIO_INHERIT represents pthread's PTHREAD_PRIO_INHERIT mutex, which when contention happens, kernel should do priority propagating. Flag UMUTEX_PRIO_PROTECT indicates it is pthread's PTHREAD_PRIO_PROTECT mutex, userland should initialize m_owner to contested state UMUTEX_CONTESTED, then atomic_cmpset_int will be failure and kernel syscall should be invoked to do locking, this becauses for such a mutex, kernel should always boost the thread's priority before it can lock the mutex, m_ceilings is used by PTHREAD_PRIO_PROTECT mutex, the first element is used to boost thread's priority when it locked the mutex, second element is used when the mutex is unlocked, the PTHREAD_PRIO_PROTECT mutex's link list is kept in userland, the m_ceiling[1] is managed by thread library so kernel needn't allocate memory to keep the link list, when such a mutex is unlocked, kernel reset m_owner to UMUTEX_CONTESTED. Flag USYNC_PROCESS_SHARED indicate if the synchronization object is process shared, if the flag is not set, it saves a vm_map_lookup() call. The umtx chain is still used as a sleep queue, when a thread is blocked on PTHREAD_PRIO_INHERIT mutex, a umtx_pi is allocated to support priority propagating, it is dynamically allocated and reference count is used, it is not optimized but works well in my tests, while the umtx chain has its own locking protocol, the priority propagating protocol are all protected by sched_lock because priority propagating function is called with sched_lock held from scheduler. No visible performance degradation is found which these changes. Some parameter names in _umtx_op syscall are renamed.
2006-08-28 04:24:51 +00:00
454 AUE_NULL STD { int _umtx_op(void *obj, int op, \
u_long val, void *uaddr1, void *uaddr2); }
455 AUE_NULL STD { int thr_new(struct thr_param *param, \
int param_size); }
456 AUE_NULL STD { int sigqueue(pid_t pid, int signum, void *value); }
457 AUE_NULL NOSTD { int kmq_open(const char *path, int flags, \
mode_t mode, const struct mq_attr *attr); }
458 AUE_NULL NOSTD { int kmq_setattr(int mqd, \
const struct mq_attr *attr, \
struct mq_attr *oattr); }
459 AUE_NULL NOSTD { int kmq_timedreceive(int mqd, \
char *msg_ptr, size_t msg_len, \
unsigned *msg_prio, \
const struct timespec *abs_timeout); }
460 AUE_NULL NOSTD { int kmq_timedsend(int mqd, \
const char *msg_ptr, size_t msg_len,\
unsigned msg_prio, \
const struct timespec *abs_timeout);}
461 AUE_NULL NOSTD { int kmq_notify(int mqd, \
const struct sigevent *sigev); }
462 AUE_NULL NOSTD { int kmq_unlink(const char *path); }
463 AUE_NULL STD { int abort2(const char *why, int nargs, void **args); }
464 AUE_NULL STD { int thr_set_name(long id, const char *name); }
465 AUE_NULL STD { int aio_fsync(int op, struct aiocb *aiocbp); }
466 AUE_RTPRIO STD { int rtprio_thread(int function, \
lwpid_t lwpid, struct rtprio *rtp); }
467 AUE_NULL UNIMPL nosys
468 AUE_NULL UNIMPL nosys
469 AUE_NULL UNIMPL __getpath_fromfd
470 AUE_NULL UNIMPL __getpath_fromaddr
471 AUE_NULL NOSTD { int sctp_peeloff(int sd, uint32_t name); }
472 AUE_NULL NOSTD { int sctp_generic_sendmsg(int sd, caddr_t msg, int mlen, \
caddr_t to, __socklen_t tolen, \
struct sctp_sndrcvinfo *sinfo, int flags); }
473 AUE_NULL NOSTD { int sctp_generic_sendmsg_iov(int sd, struct iovec *iov, int iovlen, \
caddr_t to, __socklen_t tolen, \
struct sctp_sndrcvinfo *sinfo, int flags); }
474 AUE_NULL NOSTD { int sctp_generic_recvmsg(int sd, struct iovec *iov, int iovlen, \
struct sockaddr * from, __socklen_t *fromlenaddr, \
struct sctp_sndrcvinfo *sinfo, int *msg_flags); }
475 AUE_PREAD STD { ssize_t pread(int fd, void *buf, \
size_t nbyte, off_t offset); }
476 AUE_PWRITE STD { ssize_t pwrite(int fd, const void *buf, \
size_t nbyte, off_t offset); }
477 AUE_MMAP STD { caddr_t mmap(caddr_t addr, size_t len, \
int prot, int flags, int fd, off_t pos); }
478 AUE_LSEEK STD { off_t lseek(int fd, off_t offset, \
int whence); }
479 AUE_TRUNCATE STD { int truncate(char *path, off_t length); }
480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); }
481 AUE_KILL STD { int thr_kill2(pid_t pid, long id, int sig); }
482 AUE_SHMOPEN STD { int shm_open(const char *path, int flags, \
Add a new file descriptor type for IPC shared memory objects and use it to implement shm_open(2) and shm_unlink(2) in the kernel: - Each shared memory file descriptor is associated with a swap-backed vm object which provides the backing store. Each descriptor starts off with a size of zero, but the size can be altered via ftruncate(2). The shared memory file descriptors also support fstat(2). read(2), write(2), ioctl(2), select(2), poll(2), and kevent(2) are not supported on shared memory file descriptors. - shm_open(2) and shm_unlink(2) are now implemented as system calls that manage shared memory file descriptors. The virtual namespace that maps pathnames to shared memory file descriptors is implemented as a hash table where the hash key is generated via the 32-bit Fowler/Noll/Vo hash of the pathname. - As an extension, the constant 'SHM_ANON' may be specified in place of the path argument to shm_open(2). In this case, an unnamed shared memory file descriptor will be created similar to the IPC_PRIVATE key for shmget(2). Note that the shared memory object can still be shared among processes by sharing the file descriptor via fork(2) or sendmsg(2), but it is unnamed. This effectively serves to implement the getmemfd() idea bandied about the lists several times over the years. - The backing store for shared memory file descriptors are garbage collected when they are not referenced by any open file descriptors or the shm_open(2) virtual namespace. Submitted by: dillon, peter (previous versions) Submitted by: rwatson (I based this on his version) Reviewed by: alc (suggested converting getmemfd() to shm_open())
2008-01-08 21:58:16 +00:00
mode_t mode); }
483 AUE_SHMUNLINK STD { int shm_unlink(const char *path); }
484 AUE_NULL STD { int cpuset(cpusetid_t *setid); }
485 AUE_NULL STD { int cpuset_setid(cpuwhich_t which, id_t id, \
cpusetid_t setid); }
486 AUE_NULL STD { int cpuset_getid(cpulevel_t level, \
cpuwhich_t which, id_t id, \
cpusetid_t *setid); }
487 AUE_NULL STD { int cpuset_getaffinity(cpulevel_t level, \
cpuwhich_t which, id_t id, size_t cpusetsize, \
cpuset_t *mask); }
488 AUE_NULL STD { int cpuset_setaffinity(cpulevel_t level, \
cpuwhich_t which, id_t id, size_t cpusetsize, \
const cpuset_t *mask); }
489 AUE_FACCESSAT STD { int faccessat(int fd, char *path, int amode, \
int flag); }
490 AUE_FCHMODAT STD { int fchmodat(int fd, char *path, mode_t mode, \
int flag); }
491 AUE_FCHOWNAT STD { int fchownat(int fd, char *path, uid_t uid, \
gid_t gid, int flag); }
492 AUE_FEXECVE STD { int fexecve(int fd, char **argv, \
char **envv); }
493 AUE_FSTATAT STD { int fstatat(int fd, char *path, \
struct stat *buf, int flag); }
494 AUE_FUTIMESAT STD { int futimesat(int fd, char *path, \
struct timeval *times); }
495 AUE_LINKAT STD { int linkat(int fd1, char *path1, int fd2, \
char *path2, int flag); }
496 AUE_MKDIRAT STD { int mkdirat(int fd, char *path, mode_t mode); }
497 AUE_MKFIFOAT STD { int mkfifoat(int fd, char *path, mode_t mode); }
498 AUE_MKNODAT STD { int mknodat(int fd, char *path, mode_t mode, \
dev_t dev); }
; XXX: see the comment for open
499 AUE_OPENAT_RWTC STD { int openat(int fd, char *path, int flag, \
mode_t mode); }
500 AUE_READLINKAT STD { int readlinkat(int fd, char *path, char *buf, \
size_t bufsize); }
501 AUE_RENAMEAT STD { int renameat(int oldfd, char *old, int newfd, \
char *new); }
502 AUE_SYMLINKAT STD { int symlinkat(char *path1, int fd, \
char *path2); }
503 AUE_UNLINKAT STD { int unlinkat(int fd, char *path, int flag); }
504 AUE_POSIX_OPENPT STD { int posix_openpt(int flags); }
Implement support for RPCSEC_GSS authentication to both the NFS client and server. This replaces the RPC implementation of the NFS client and server with the newer RPC implementation originally developed (actually ported from the userland sunrpc code) to support the NFS Lock Manager. I have tested this code extensively and I believe it is stable and that performance is at least equal to the legacy RPC implementation. The NFS code currently contains support for both the new RPC implementation and the older legacy implementation inherited from the original NFS codebase. The default is to use the new implementation - add the NFS_LEGACYRPC option to fall back to the old code. When I merge this support back to RELENG_7, I will probably change this so that users have to 'opt in' to get the new code. To use RPCSEC_GSS on either client or server, you must build a kernel which includes the KGSSAPI option and the crypto device. On the userland side, you must build at least a new libc, mountd, mount_nfs and gssd. You must install new versions of /etc/rc.d/gssd and /etc/rc.d/nfsd and add 'gssd_enable=YES' to /etc/rc.conf. As long as gssd is running, you should be able to mount an NFS filesystem from a server that requires RPCSEC_GSS authentication. The mount itself can happen without any kerberos credentials but all access to the filesystem will be denied unless the accessing user has a valid ticket file in the standard place (/tmp/krb5cc_<uid>). There is currently no support for situations where the ticket file is in a different place, such as when the user logged in via SSH and has delegated credentials from that login. This restriction is also present in Solaris and Linux. In theory, we could improve this in future, possibly using Brooks Davis' implementation of variant symlinks. Supporting RPCSEC_GSS on a server is nearly as simple. You must create service creds for the server in the form 'nfs/<fqdn>@<REALM>' and install them in /etc/krb5.keytab. The standard heimdal utility ktutil makes this fairly easy. After the service creds have been created, you can add a '-sec=krb5' option to /etc/exports and restart both mountd and nfsd. The only other difference an administrator should notice is that nfsd doesn't fork to create service threads any more. In normal operation, there will be two nfsd processes, one in userland waiting for TCP connections and one in the kernel handling requests. The latter process will create as many kthreads as required - these should be visible via 'top -H'. The code has some support for varying the number of service threads according to load but initially at least, nfsd uses a fixed number of threads according to the value supplied to its '-n' option. Sponsored by: Isilon Systems MFC after: 1 month
2008-11-03 10:38:00 +00:00
; 505 is initialised by the kgssapi code, if present.
505 AUE_NULL NOSTD { int gssd_syscall(char *path); }
506 AUE_NULL STD { int jail_get(struct iovec *iovp, \
unsigned int iovcnt, int flags); }
507 AUE_NULL STD { int jail_set(struct iovec *iovp, \
unsigned int iovcnt, int flags); }
508 AUE_NULL STD { int jail_remove(int jid); }
509 AUE_CLOSEFROM STD { int closefrom(int lowfd); }
Change the ABI of some of the structures used by the SYSV IPC API: - The uid/cuid members of struct ipc_perm are now uid_t instead of unsigned short. - The gid/cgid members of struct ipc_perm are now gid_t instead of unsigned short. - The mode member of struct ipc_perm is now mode_t instead of unsigned short (this is merely a style bug). - The rather dubious padding fields for ABI compat with SV/I386 have been removed from struct msqid_ds and struct semid_ds. - The shm_segsz member of struct shmid_ds is now a size_t instead of an int. This removes the need for the shm_bsegsz member in struct shmid_kernel and should allow for complete support of SYSV SHM regions >= 2GB. - The shm_nattch member of struct shmid_ds is now an int instead of a short. - The shm_internal member of struct shmid_ds is now gone. The internal VM object pointer for SHM regions has been moved into struct shmid_kernel. - The existing __semctl(), msgctl(), and shmctl() system call entries are now marked COMPAT7 and new versions of those system calls which support the new ABI are now present. - The new system calls are assigned to the FBSD-1.1 version in libc. The FBSD-1.0 symbols in libc now refer to the old COMPAT7 system calls. - A simplistic framework for tagging system calls with compatibility symbol versions has been added to libc. Version tags are added to system calls by adding an appropriate __sym_compat() entry to src/lib/libc/incldue/compat.h. [1] PR: kern/16195 kern/113218 bin/129855 Reviewed by: arch@, rwatson Discussed with: kan, kib [1]
2009-06-24 21:10:52 +00:00
510 AUE_SEMCTL NOSTD { int __semctl(int semid, int semnum, \
int cmd, union semun *arg); }
511 AUE_MSGCTL NOSTD { int msgctl(int msqid, int cmd, \
struct msqid_ds *buf); }
512 AUE_SHMCTL NOSTD { int shmctl(int shmid, int cmd, \
struct shmid_ds *buf); }
513 AUE_LPATHCONF STD { int lpathconf(char *path, int name); }
Change the cap_rights_t type from uint64_t to a structure that we can extend in the future in a backward compatible (API and ABI) way. The cap_rights_t represents capability rights. We used to use one bit to represent one right, but we are running out of spare bits. Currently the new structure provides place for 114 rights (so 50 more than the previous cap_rights_t), but it is possible to grow the structure to hold at least 285 rights, although we can make it even larger if 285 rights won't be enough. The structure definition looks like this: struct cap_rights { uint64_t cr_rights[CAP_RIGHTS_VERSION + 2]; }; The initial CAP_RIGHTS_VERSION is 0. The top two bits in the first element of the cr_rights[] array contain total number of elements in the array - 2. This means if those two bits are equal to 0, we have 2 array elements. The top two bits in all remaining array elements should be 0. The next five bits in all array elements contain array index. Only one bit is used and bit position in this five-bits range defines array index. This means there can be at most five array elements in the future. To define new right the CAPRIGHT() macro must be used. The macro takes two arguments - an array index and a bit to set, eg. #define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL) We still support aliases that combine few rights, but the rights have to belong to the same array element, eg: #define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL) #define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL) #define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP) There is new API to manage the new cap_rights_t structure: cap_rights_t *cap_rights_init(cap_rights_t *rights, ...); void cap_rights_set(cap_rights_t *rights, ...); void cap_rights_clear(cap_rights_t *rights, ...); bool cap_rights_is_set(const cap_rights_t *rights, ...); bool cap_rights_is_valid(const cap_rights_t *rights); void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src); void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src); bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little); Capability rights to the cap_rights_init(), cap_rights_set(), cap_rights_clear() and cap_rights_is_set() functions are provided by separating them with commas, eg: cap_rights_t rights; cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT); There is no need to terminate the list of rights, as those functions are actually macros that take care of the termination, eg: #define cap_rights_set(rights, ...) \ __cap_rights_set((rights), __VA_ARGS__, 0ULL) void __cap_rights_set(cap_rights_t *rights, ...); Thanks to using one bit as an array index we can assert in those functions that there are no two rights belonging to different array elements provided together. For example this is illegal and will be detected, because CAP_LOOKUP belongs to element 0 and CAP_PDKILL to element 1: cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL); Providing several rights that belongs to the same array's element this way is correct, but is not advised. It should only be used for aliases definition. This commit also breaks compatibility with some existing Capsicum system calls, but I see no other way to do that. This should be fine as Capsicum is still experimental and this change is not going to 9.x. Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
514 AUE_NULL OBSOL cap_new
515 AUE_CAP_RIGHTS_GET STD { int __cap_rights_get(int version, \
int fd, cap_rights_t *rightsp); }
516 AUE_CAP_ENTER STD { int cap_enter(void); }
517 AUE_CAP_GETMODE STD { int cap_getmode(u_int *modep); }
518 AUE_PDFORK STD { int pdfork(int *fdp, int flags); }
519 AUE_PDKILL STD { int pdkill(int fd, int signum); }
520 AUE_PDGETPID STD { int pdgetpid(int fd, pid_t *pidp); }
521 AUE_PDWAIT UNIMPL pdwait4
522 AUE_SELECT STD { int pselect(int nd, fd_set *in, \
fd_set *ou, fd_set *ex, \
const struct timespec *ts, \
const sigset_t *sm); }
523 AUE_NULL STD { int getloginclass(char *namebuf, \
size_t namelen); }
524 AUE_NULL STD { int setloginclass(const char *namebuf); }
525 AUE_NULL STD { int rctl_get_racct(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
526 AUE_NULL STD { int rctl_get_rules(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
527 AUE_NULL STD { int rctl_get_limits(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
528 AUE_NULL STD { int rctl_add_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
530 AUE_NULL STD { int posix_fallocate(int fd, \
off_t offset, off_t len); }
Add the posix_fadvise(2) system call. It is somewhat similar to madvise(2) except that it operates on a file descriptor instead of a memory region. It is currently only supported on regular files. Just as with madvise(2), the advice given to posix_fadvise(2) can be divided into two types. The first type provide hints about data access patterns and are used in the file read and write routines to modify the I/O flags passed down to VOP_READ() and VOP_WRITE(). These modes are thus filesystem independent. Note that to ease implementation (and since this API is only advisory anyway), only a single non-normal range is allowed per file descriptor. The second type of hints are used to hint to the OS that data will or will not be used. These hints are implemented via a new VOP_ADVISE(). A default implementation is provided which does nothing for the WILLNEED request and attempts to move any clean pages to the cache page queue for the DONTNEED request. This latter case required two other changes. First, a new V_CLEANONLY flag was added to vinvalbuf(). This requests vinvalbuf() to only flush clean buffers for the vnode from the buffer cache and to not remove any backing pages from the vnode. This is used to ensure clean pages are not wired into the buffer cache before attempting to move them to the cache page queue. The second change adds a new vm_object_page_cache() method. This method is somewhat similar to vm_object_page_remove() except that instead of freeing each page in the specified range, it attempts to move clean pages to the cache queue if possible. To preserve the ABI of struct file, the f_cdevpriv pointer is now reused in a union to point to the currently active advice region if one is present for regular files. Reviewed by: jilles, kib, arch@ Approved by: re (kib) MFC after: 1 month
2011-11-04 04:02:50 +00:00
531 AUE_NULL STD { int posix_fadvise(int fd, off_t offset, \
off_t len, int advice); }
532 AUE_WAIT6 STD { int wait6(idtype_t idtype, id_t id, \
int *status, int options, \
struct __wrusage *wrusage, \
siginfo_t *info); }
Merge Capsicum overhaul: - Capability is no longer separate descriptor type. Now every descriptor has set of its own capability rights. - The cap_new(2) system call is left, but it is no longer documented and should not be used in new code. - The new syscall cap_rights_limit(2) should be used instead of cap_new(2), which limits capability rights of the given descriptor without creating a new one. - The cap_getrights(2) syscall is renamed to cap_rights_get(2). - If CAP_IOCTL capability right is present we can further reduce allowed ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed ioctls can be retrived with cap_ioctls_get(2) syscall. - If CAP_FCNTL capability right is present we can further reduce fcntls that can be used with the new cap_fcntls_limit(2) syscall and retrive them with cap_fcntls_get(2). - To support ioctl and fcntl white-listing the filedesc structure was heavly modified. - The audit subsystem, kdump and procstat tools were updated to recognize new syscalls. - Capability rights were revised and eventhough I tried hard to provide backward API and ABI compatibility there are some incompatible changes that are described in detail below: CAP_CREATE old behaviour: - Allow for openat(2)+O_CREAT. - Allow for linkat(2). - Allow for symlinkat(2). CAP_CREATE new behaviour: - Allow for openat(2)+O_CREAT. Added CAP_LINKAT: - Allow for linkat(2). ABI: Reuses CAP_RMDIR bit. - Allow to be target for renameat(2). Added CAP_SYMLINKAT: - Allow for symlinkat(2). Removed CAP_DELETE. Old behaviour: - Allow for unlinkat(2) when removing non-directory object. - Allow to be source for renameat(2). Removed CAP_RMDIR. Old behaviour: - Allow for unlinkat(2) when removing directory. Added CAP_RENAMEAT: - Required for source directory for the renameat(2) syscall. Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR): - Allow for unlinkat(2) on any object. - Required if target of renameat(2) exists and will be removed by this call. Removed CAP_MAPEXEC. CAP_MMAP old behaviour: - Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and PROT_WRITE. CAP_MMAP new behaviour: - Allow for mmap(2)+PROT_NONE. Added CAP_MMAP_R: - Allow for mmap(PROT_READ). Added CAP_MMAP_W: - Allow for mmap(PROT_WRITE). Added CAP_MMAP_X: - Allow for mmap(PROT_EXEC). Added CAP_MMAP_RW: - Allow for mmap(PROT_READ | PROT_WRITE). Added CAP_MMAP_RX: - Allow for mmap(PROT_READ | PROT_EXEC). Added CAP_MMAP_WX: - Allow for mmap(PROT_WRITE | PROT_EXEC). Added CAP_MMAP_RWX: - Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). Renamed CAP_MKDIR to CAP_MKDIRAT. Renamed CAP_MKFIFO to CAP_MKFIFOAT. Renamed CAP_MKNODE to CAP_MKNODEAT. CAP_READ old behaviour: - Allow pread(2). - Disallow read(2), readv(2) (if there is no CAP_SEEK). CAP_READ new behaviour: - Allow read(2), readv(2). - Disallow pread(2) (CAP_SEEK was also required). CAP_WRITE old behaviour: - Allow pwrite(2). - Disallow write(2), writev(2) (if there is no CAP_SEEK). CAP_WRITE new behaviour: - Allow write(2), writev(2). - Disallow pwrite(2) (CAP_SEEK was also required). Added convinient defines: #define CAP_PREAD (CAP_SEEK | CAP_READ) #define CAP_PWRITE (CAP_SEEK | CAP_WRITE) #define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) #define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) #define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) #define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) #define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) #define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) #define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) #define CAP_RECV CAP_READ #define CAP_SEND CAP_WRITE #define CAP_SOCK_CLIENT \ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) #define CAP_SOCK_SERVER \ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ CAP_SETSOCKOPT | CAP_SHUTDOWN) Added defines for backward API compatibility: #define CAP_MAPEXEC CAP_MMAP_X #define CAP_DELETE CAP_UNLINKAT #define CAP_MKDIR CAP_MKDIRAT #define CAP_RMDIR CAP_UNLINKAT #define CAP_MKFIFO CAP_MKFIFOAT #define CAP_MKNOD CAP_MKNODAT #define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) Sponsored by: The FreeBSD Foundation Reviewed by: Christoph Mallon <christoph.mallon@gmx.de> Many aspects discussed with: rwatson, benl, jonathan ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
533 AUE_CAP_RIGHTS_LIMIT STD { int cap_rights_limit(int fd, \
Change the cap_rights_t type from uint64_t to a structure that we can extend in the future in a backward compatible (API and ABI) way. The cap_rights_t represents capability rights. We used to use one bit to represent one right, but we are running out of spare bits. Currently the new structure provides place for 114 rights (so 50 more than the previous cap_rights_t), but it is possible to grow the structure to hold at least 285 rights, although we can make it even larger if 285 rights won't be enough. The structure definition looks like this: struct cap_rights { uint64_t cr_rights[CAP_RIGHTS_VERSION + 2]; }; The initial CAP_RIGHTS_VERSION is 0. The top two bits in the first element of the cr_rights[] array contain total number of elements in the array - 2. This means if those two bits are equal to 0, we have 2 array elements. The top two bits in all remaining array elements should be 0. The next five bits in all array elements contain array index. Only one bit is used and bit position in this five-bits range defines array index. This means there can be at most five array elements in the future. To define new right the CAPRIGHT() macro must be used. The macro takes two arguments - an array index and a bit to set, eg. #define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL) We still support aliases that combine few rights, but the rights have to belong to the same array element, eg: #define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL) #define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL) #define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP) There is new API to manage the new cap_rights_t structure: cap_rights_t *cap_rights_init(cap_rights_t *rights, ...); void cap_rights_set(cap_rights_t *rights, ...); void cap_rights_clear(cap_rights_t *rights, ...); bool cap_rights_is_set(const cap_rights_t *rights, ...); bool cap_rights_is_valid(const cap_rights_t *rights); void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src); void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src); bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little); Capability rights to the cap_rights_init(), cap_rights_set(), cap_rights_clear() and cap_rights_is_set() functions are provided by separating them with commas, eg: cap_rights_t rights; cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT); There is no need to terminate the list of rights, as those functions are actually macros that take care of the termination, eg: #define cap_rights_set(rights, ...) \ __cap_rights_set((rights), __VA_ARGS__, 0ULL) void __cap_rights_set(cap_rights_t *rights, ...); Thanks to using one bit as an array index we can assert in those functions that there are no two rights belonging to different array elements provided together. For example this is illegal and will be detected, because CAP_LOOKUP belongs to element 0 and CAP_PDKILL to element 1: cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL); Providing several rights that belongs to the same array's element this way is correct, but is not advised. It should only be used for aliases definition. This commit also breaks compatibility with some existing Capsicum system calls, but I see no other way to do that. This should be fine as Capsicum is still experimental and this change is not going to 9.x. Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
cap_rights_t *rightsp); }
Merge Capsicum overhaul: - Capability is no longer separate descriptor type. Now every descriptor has set of its own capability rights. - The cap_new(2) system call is left, but it is no longer documented and should not be used in new code. - The new syscall cap_rights_limit(2) should be used instead of cap_new(2), which limits capability rights of the given descriptor without creating a new one. - The cap_getrights(2) syscall is renamed to cap_rights_get(2). - If CAP_IOCTL capability right is present we can further reduce allowed ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed ioctls can be retrived with cap_ioctls_get(2) syscall. - If CAP_FCNTL capability right is present we can further reduce fcntls that can be used with the new cap_fcntls_limit(2) syscall and retrive them with cap_fcntls_get(2). - To support ioctl and fcntl white-listing the filedesc structure was heavly modified. - The audit subsystem, kdump and procstat tools were updated to recognize new syscalls. - Capability rights were revised and eventhough I tried hard to provide backward API and ABI compatibility there are some incompatible changes that are described in detail below: CAP_CREATE old behaviour: - Allow for openat(2)+O_CREAT. - Allow for linkat(2). - Allow for symlinkat(2). CAP_CREATE new behaviour: - Allow for openat(2)+O_CREAT. Added CAP_LINKAT: - Allow for linkat(2). ABI: Reuses CAP_RMDIR bit. - Allow to be target for renameat(2). Added CAP_SYMLINKAT: - Allow for symlinkat(2). Removed CAP_DELETE. Old behaviour: - Allow for unlinkat(2) when removing non-directory object. - Allow to be source for renameat(2). Removed CAP_RMDIR. Old behaviour: - Allow for unlinkat(2) when removing directory. Added CAP_RENAMEAT: - Required for source directory for the renameat(2) syscall. Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR): - Allow for unlinkat(2) on any object. - Required if target of renameat(2) exists and will be removed by this call. Removed CAP_MAPEXEC. CAP_MMAP old behaviour: - Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and PROT_WRITE. CAP_MMAP new behaviour: - Allow for mmap(2)+PROT_NONE. Added CAP_MMAP_R: - Allow for mmap(PROT_READ). Added CAP_MMAP_W: - Allow for mmap(PROT_WRITE). Added CAP_MMAP_X: - Allow for mmap(PROT_EXEC). Added CAP_MMAP_RW: - Allow for mmap(PROT_READ | PROT_WRITE). Added CAP_MMAP_RX: - Allow for mmap(PROT_READ | PROT_EXEC). Added CAP_MMAP_WX: - Allow for mmap(PROT_WRITE | PROT_EXEC). Added CAP_MMAP_RWX: - Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). Renamed CAP_MKDIR to CAP_MKDIRAT. Renamed CAP_MKFIFO to CAP_MKFIFOAT. Renamed CAP_MKNODE to CAP_MKNODEAT. CAP_READ old behaviour: - Allow pread(2). - Disallow read(2), readv(2) (if there is no CAP_SEEK). CAP_READ new behaviour: - Allow read(2), readv(2). - Disallow pread(2) (CAP_SEEK was also required). CAP_WRITE old behaviour: - Allow pwrite(2). - Disallow write(2), writev(2) (if there is no CAP_SEEK). CAP_WRITE new behaviour: - Allow write(2), writev(2). - Disallow pwrite(2) (CAP_SEEK was also required). Added convinient defines: #define CAP_PREAD (CAP_SEEK | CAP_READ) #define CAP_PWRITE (CAP_SEEK | CAP_WRITE) #define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) #define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) #define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) #define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) #define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) #define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) #define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) #define CAP_RECV CAP_READ #define CAP_SEND CAP_WRITE #define CAP_SOCK_CLIENT \ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) #define CAP_SOCK_SERVER \ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ CAP_SETSOCKOPT | CAP_SHUTDOWN) Added defines for backward API compatibility: #define CAP_MAPEXEC CAP_MMAP_X #define CAP_DELETE CAP_UNLINKAT #define CAP_MKDIR CAP_MKDIRAT #define CAP_RMDIR CAP_UNLINKAT #define CAP_MKFIFO CAP_MKFIFOAT #define CAP_MKNOD CAP_MKNODAT #define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) Sponsored by: The FreeBSD Foundation Reviewed by: Christoph Mallon <christoph.mallon@gmx.de> Many aspects discussed with: rwatson, benl, jonathan ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
534 AUE_CAP_IOCTLS_LIMIT STD { int cap_ioctls_limit(int fd, \
const u_long *cmds, size_t ncmds); }
535 AUE_CAP_IOCTLS_GET STD { ssize_t cap_ioctls_get(int fd, \
u_long *cmds, size_t maxcmds); }
536 AUE_CAP_FCNTLS_LIMIT STD { int cap_fcntls_limit(int fd, \
uint32_t fcntlrights); }
537 AUE_CAP_FCNTLS_GET STD { int cap_fcntls_get(int fd, \
uint32_t *fcntlrightsp); }
538 AUE_BINDAT STD { int bindat(int fd, int s, caddr_t name, \
int namelen); }
539 AUE_CONNECTAT STD { int connectat(int fd, int s, caddr_t name, \
int namelen); }
540 AUE_CHFLAGSAT STD { int chflagsat(int fd, const char *path, \
u_long flags, int atflag); }
541 AUE_ACCEPT STD { int accept4(int s, \
struct sockaddr * __restrict name, \
__socklen_t * __restrict anamelen, \
int flags); }
542 AUE_PIPE STD { int pipe2(int *fildes, int flags); }
543 AUE_NULL STD { int aio_mlock(struct aiocb *aiocbp); }
544 AUE_NULL STD { int procctl(idtype_t idtype, id_t id, \
int com, void *data); }
545 AUE_POLL STD { int ppoll(struct pollfd *fds, u_int nfds, \
const struct timespec *ts, \
const sigset_t *set); }
546 AUE_FUTIMES STD { int futimens(int fd, \
struct timespec *times); }
547 AUE_FUTIMESAT STD { int utimensat(int fd, \
char *path, \
struct timespec *times, int flag); }
2015-07-11 15:22:11 +00:00
548 AUE_NULL STD { int numa_getaffinity(cpuwhich_t which, \
id_t id, \
struct vm_domain_policy_entry *policy); }
549 AUE_NULL STD { int numa_setaffinity(cpuwhich_t which, \
id_t id, \
const struct vm_domain_policy_entry *policy); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master