Continue to introduce Capsicum Capability Mode support:

Add a new system call flag, SYF_CAPENABLED, which indicates that a
particular system call is available in capability mode.

Add a new configuration file, kern/capabilities.conf (similar files
may be introduced for other ABIs in the future), which enumerates
system calls that are available in capability mode.  When a new
system call is added to syscalls.master, it will also need to be
added here (if needed).  Teach sysent parts to use this file to set
values for SYF_CAPENABLED for the native ABI.

Reviewed by:	anderson
Discussed with:	benl, kris, pjd
Obtained from:	Capsicum Project
MFC after:	3 months
This commit is contained in:
Robert Watson 2011-03-01 13:28:27 +00:00
parent 3ddc3c85ab
commit 08e6d9fad8
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=219131
4 changed files with 786 additions and 2 deletions

View File

@ -10,7 +10,8 @@ sysent: init_sysent.c syscalls.c ../sys/syscall.h ../sys/syscall.mk \
../sys/sysproto.h
init_sysent.c syscalls.c systrace_args.c ../sys/syscall.h \
../sys/syscall.mk ../sys/sysproto.h: makesyscalls.sh syscalls.master
../sys/syscall.mk ../sys/sysproto.h: makesyscalls.sh syscalls.master \
capabilities.conf
-mv -f init_sysent.c init_sysent.c.bak
-mv -f syscalls.c syscalls.c.bak
-mv -f systrace_args.c systrace_args.c.bak

756
sys/kern/capabilities.conf Normal file
View File

@ -0,0 +1,756 @@
##
## Copyright (c) 2008-2010 Robert N. M. Watson
## All rights reserved.
##
## This software was developed at the University of Cambridge Computer
## Laboratory with support from a grant from Google, Inc.
##
## Redistribution and use in source and binary forms, with or without
## modification, are permitted provided that the following conditions
## are met:
## 1. Redistributions of source code must retain the above copyright
## notice, this list of conditions and the following disclaimer.
## 2. Redistributions in binary form must reproduce the above copyright
## notice, this list of conditions and the following disclaimer in the
## documentation and/or other materials provided with the distribution.
##
## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
## ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
## SUCH DAMAGE.
##
## List of system calls enabled in capability mode, one name per line.
##
## Notes:
## - sys_exit(2), abort2(2) and close(2) are very important.
## - Sorted alphabetically, please keep it that way.
##
## $FreeBSD$
##
##
## Allow ACL and MAC label operations by file descriptor, subject to
## capability rights. Allow MAC label operations on the current process but
## we will need to scope __mac_get_pid(2).
##
__acl_aclcheck_fd
__acl_delete_fd
__acl_get_fd
__acl_set_fd
__mac_get_fd
#__mac_get_pid
__mac_get_proc
__mac_set_fd
__mac_set_proc
##
## Allow sysctl(2) as we scope internal to the call; this is a global
## namespace, but there are several critical sysctls required for almost
## anything to run, such as hw.pagesize. For now that policy lives in the
## kernel for performance and simplicity, but perhaps it could move to a
## proxying daemon in userspace.
##
__sysctl
##
## Allow umtx operations as these are scoped by address space.
##
## XXRW: Need to check this very carefully.
##
_umtx_lock
_umtx_op
_umtx_unlock
##
## Allow process termination using abort2(2).
##
abort2
##
## Allow accept(2) since it doesn't manipulate namespaces directly, rather
## relies on existing bindings on a socket, subject to capability rights.
##
accept
##
## Allow AIO operations by file descriptor, subject to capability rights.
##
aio_cancel
aio_error
aio_fsync
aio_read
aio_return
aio_suspend
aio_waitcomplete
aio_write
##
## audit(2) is a global operation, submitting to the global trail, but it is
## controlled by privilege, and it might be useful to be able to submit
## records from sandboxes. For now, disallow, but we may want to think about
## providing some sort of proxy service for this.
##
#audit
##
## Disllow bind(2) for now, even though we support CAP_BIND.
##
## XXXRW: Revisit this.
##
#bind
##
## Allow capability mode and capability system calls.
##
cap_enter
cap_getmode
cap_getrights
cap_new
##
## Allow read-only clock operations.
##
clock_gettime
clock_getres
##
## Always allow file descriptor close(2).
##
close
closefrom
##
## Disallow connect(2) for now, despite CAP_CONNECT.
##
## XXXRW: Revisit this.
##
#connect
##
## cpuset(2) and related calls require scoping by process, but should
## eventually be allowed, at least in the current process case.
##
#cpuset
#cpuset_getaffinity
#cpuset_getid
#cpuset_setaffinity
#cpuset_setid
##
## Always allow dup(2) and dup2(2) manipulation of the file descriptor table.
##
dup
dup2
##
## Allow extended attribute operations by file descriptor, subject to
## capability rights.
##
extattr_delete_fd
extattr_get_fd
extattr_list_fd
extattr_set_fd
##
## Allow changing file flags, mode, and owner by file descriptor, subject to
## capability rights.
##
fchflags
fchmod
fchown
##
## For now, allow fcntl(2), subject to capability rights, but this probably
## needs additional scoping.
##
fcntl
##
## Allow fexecve(2), subject to capability rights. We perform some scoping,
## such as disallowing privilege escalation.
##
fexecve
##
## Allow flock(2), subject to capability rights.
##
flock
##
## Allow fork(2), even though it returns pids -- some applications seem to
## prefer this interface.
##
fork
##
## Allow fpathconf(2), subject to capability rights.
##
fpathconf
##
## Allow various file descriptor-based I/O operations, subject to capability
## rights. mmap(2) requires further attention.
##
freebsd6_ftruncate
freebsd6_lseek
freebsd6_mmap
freebsd6_pread
freebsd6_pwrite
##
## Allow querying file and file system state with fstat(2) and fstatfs(2),
## subject to capability rights.
##
fstat
fstatfs
##
## Allow further file descriptor-based I/O operations, subject to capability
## rights.
##
fsync
ftruncate
##
## Allow futimes(2), subject to capability rights.
##
futimes
##
## Allow querying process audit state, subject to normal access control.
##
getaudit
getaudit_addr
getauid
##
## Allow thread context management with getcontext(2).
##
getcontext
##
## Allow directory I/O on a file descriptor, subject to capability rights.
## Originally we had separate capabilities for directory-specific read
## operations, but on BSD we allow reading the raw directory data, so we just
## rely on CAP_READ (etc) now.
##
## XXXRW: Possibly these should also use CAP_SEEK.
##
getdents
getdirentries
##
## Allow querying certain trivial global state.
##
getdomainname
##
## Allow querying current process credential state.
##
getegid
geteuid
##
## Allow querying certain trivial global state.
##
gethostid
gethostname
##
## Allow querying per-process timer.
##
getitimer
##
## Allow querying current process credential state.
##
getgid
getgroups
getlogin
##
## Allow querying certain trivial global state.
##
getpagesize
getpeername
##
## Allow querying certain per-process scheduling, resource limit, and
## credential state.
##
## XXXRW: getpgid(2) needs scoping. It's not clear if it's worth scoping
## getppid(2). getpriority(2) needs scoping. getrusage(2) needs scoping.
## getsid(2) needs scoping.
##
getpgid
getpgrp
getpid
getppid
getpriority
getresgid
getresuid
getrlimit
getrusage
getsid
##
## Allow querying socket state, subject to capability rights.
##
## XXXRW: getsockopt(2) may need more attention.
##
getsockname
getsockopt
##
## Allow querying the global clock.
##
gettimeofday
##
## Allow querying current process credential state.
##
getuid
##
## Disallow ioctl(2) for now, as frequently ioctl(2) operations have global
## scope, but this is a tricky one as it is also required for tty control.
## We do have a capability right for this operation.
##
## XXXRW: This needs to be revisited.
##
#ioctl
##
## Allow querying current process credential state.
##
issetugid
##
## Allow kevent(2), as we will authorize based on capability rights on the
## target descriptor.
##
## XXXRW: Do we do this?
##
kevent
##
## Allow message queue operations on file descriptors, subject to capability
## rights.
##
kmq_notify
kmq_setattr
kmq_timedreceive
kmq_timedsend
##
## Allow kqueue(2), we will control use.
##
kqueue
##
## Allow managing per-process timers.
##
ktimer_create
ktimer_delete
ktimer_getoverrun
ktimer_gettime
ktimer_settime
##
## We can't allow ktrace(2) because it relies on a global namespace, but we
## might want to introduce an fktrace(2) of some sort.
##
#ktrace
##
## Allow AIO operations by file descriptor, subject to capability rights.
##
lio_listio
##
## Allow listen(2), subject to capability rights.
##
## XXXRW: One might argue this manipulates a global namespace.
##
listen
##
## Allow I/O-related file descriptors, subject to capability rights.
##
lseek
##
## Allow MAC label operations by file descriptor, subject to capability
## rights.
##
mac_get_fd
mac_set_fd
##
## Allow simple VM operations on the current process.
##
madvise
mincore
minherit
mlock
mlockall
##
## Allow memory mapping a file descriptor, and updating protections, subject
## to capability rights.
##
## XXXRW: We currently don't properly mask VM protections using capability
## rights.
##
mmap
mprotect
##
## Allow simple VM operations on the current process.
##
msync
munlock
munlockall
munmap
##
## Allow the current process to sleep.
##
nanosleep
##
## Allow querying the global clock.
##
ntp_gettime
##
## Allow AIO operations by file descriptor, subject to capability rights.
##
oaio_read
oaio_write
##
## Allow simple VM operations on the current process.
##
obreak
##
## Allow AIO operations by file descriptor, subject to capability rights.
##
olio_listio
##
## Once Capsicum is fully merged, some of the *at(2) calls which can be
## semantically constrained will be permitted in capability mode. For now,
## we will simply not allow them to be called.
##
#faccessat
#fstatat
#fchmodat
#futimesat
#mkdirat
#rmdirat
#mkfifoat
#mknodat
#openat
#renameat
##
## ONCE CAPSICUM IS FULLY MERGED:
## Allow entry into open(2). This system call will fail, since access to the global
## file namespace has been disallowed, but allowing entry into the syscall means
## that an audit trail will be generated (which is also very useful for debugging),
##
#open
##
## Allow poll(2), which will be scoped by capability rights.
##
## XXXRW: Perhaps we don't need the OpenBSD version?
## XXXRW: We don't yet do that scoping.
##
openbsd_poll
##
## Process descriptor-related system calls are allowed.
##
pdfork
pdgetpid
pdkill
pdwait4
##
## Allow pipe(2).
##
pipe
##
## Allow poll(2), which will be scoped by capability rights.
## XXXRW: We don't yet do that scoping.
##
poll
##
## Allow I/O-related file descriptors, subject to capability rights.
##
pread
preadv
##
## Allow access to profiling state on the current process.
##
profil
##
## Disallow ptrace(2) for now, but we do need debugging facilities in
## capability mode, so we will want to revisit this, possibly by scoping its
## operation.
##
#ptrace
##
## Allow I/O-related file descriptors, subject to capability rights.
##
pwrite
pwritev
read
readv
recv
recvfrom
recvmsg
##
## Allow real-time scheduling primitives to be used.
##
## XXXRW: These require scoping.
##
rtprio
rtprio_thread
##
## Allow simple VM operations on the current process.
##
sbrk
##
## Allow querying trivial global scheduler state.
##
sched_get_priority_max
sched_get_priority_min
##
## Allow various thread/process scheduler operations.
##
## XXXRW: Some of these require further scoping.
##
sched_getparam
sched_getscheduler
sched_rr_getinterval
sched_setparam
sched_setscheduler
sched_yield
##
## Allow I/O-related file descriptors, subject to capability rights.
##
sctp_generic_recvmsg
sctp_generic_sendmsg
sctp_generic_sendmsg_iov
sctp_peeloff
##
## Allow select(2), which will be scoped by capability rights.
##
## XXXRW: But is it?
##
select
##
## Allow I/O-related file descriptors, subject to capability rights. Use of
## explicit addresses here is restricted by the system calls themselves.
##
send
sendfile
sendmsg
sendto
##
## Allow setting per-process audit state, which is controlled separately by
## privileges.
##
setaudit
setaudit_addr
setauid
##
## Allow setting thread context.
##
setcontext
##
## Allow setting current process credential state, which is controlled
## separately by privilege.
##
setegid
seteuid
setgid
##
## Allow use of the process interval timer.
##
setitimer
##
## Allow setpriority(2).
##
## XXXRW: Requires scoping.
##
setpriority
##
## Allow setting current process credential state, which is controlled
## separately by privilege.
##
setregid
setresgid
setresuid
setreuid
##
## Allow setting process resource limits with setrlimit(2).
##
setrlimit
##
## Allow creating a new session with setsid(2).
##
setsid
##
## Allow setting socket options with setsockopt(2), subject to capability
## rights.
##
## XXXRW: Might require scoping.
##
setsockopt
##
## Allow setting current process credential state, which is controlled
## separately by privilege.
##
setuid
##
## ONCE CAPSICUM IS FULLY MERGED:
## Allow shm_open(2), which is scoped so as to allow only access to new
## anonymous objects.
##
#shm_open
##
## Allow I/O-related file descriptors, subject to capability rights.
##
shutdown
##
## Allow signal control on current process.
##
sigaction
sigaltstack
sigblock
sigpending
sigprocmask
sigqueue
sigreturn
sigsetmask
sigstack
sigsuspend
sigtimedwait
sigvec
sigwaitinfo
##
## Allow creating new socket pairs with socket(2) and socketpair(2).
##
socket
socketpair
##
## Allow simple VM operations on the current process.
##
## XXXRW: Kernel doesn't implement this, so drop?
##
sstk
##
## Do allow sync(2) for now, but possibly shouldn't.
##
sync
##
## Always allow process termination with sys_exit(2).
##
sys_exit
##
## sysarch(2) does rather diverse things, but is required on at least i386
## in order to configure per-thread data. As such, it's scoped on each
## architecture.
##
sysarch
##
## Allow thread operations operating only on current process.
##
thr_create
thr_exit
thr_kill
##
## Disallow thr_kill2(2), as it may operate beyond the current process.
##
## XXXRW: Requires scoping.
##
#thr_kill2
##
## Allow thread operations operating only on current process.
##
thr_new
thr_self
thr_set_name
thr_suspend
thr_wake
##
## Allow manipulation of the current process umask with umask(2).
##
umask
##
## Allow submitting of process trace entries with utrace(2).
##
utrace
##
## Allow generating UUIDs with uuidgen(2).
##
uuidgen
##
## Allow I/O-related file descriptors, subject to capability rights.
##
write
writev
##
## Allow processes to yield(2).
##
yield

View File

@ -39,6 +39,13 @@ sysarg="sysarg.switch.$$"
sysprotoend="sysprotoend.$$"
systracetmp="systrace.$$"
if [ -r capabilities.conf ]; then
capenabled=`cat capabilities.conf | grep -v "^#" | grep -v "^$"`
capenabled=`echo $capenabled | sed 's/ /,/g'`
else
capenabled=""
fi
trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp" 0
touch $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp
@ -97,8 +104,11 @@ s/\$//g
switchname = \"$switchname\"
namesname = \"$namesname\"
infile = \"$1\"
capenabled_string = \"$capenabled\"
"'
split(capenabled_string, capenabled, ",");
printf "/*\n * System call switch table.\n *\n" > syssw
printf " * DO NOT EDIT-- this file is automatically generated.\n" > syssw
printf " * $%s$\n", "FreeBSD" > syssw
@ -290,6 +300,18 @@ s/\$//g
f++ #function return type
funcname=$f
#
# We now know the func name, so define a flags field for it.
# Do this before any other processing as we may return early
# from it.
#
for (cap in capenabled) {
if (funcname == capenabled[cap]) {
flags = "SYF_CAPENABLED";
}
}
if (funcalias == "")
funcalias = funcname
if (argalias == "") {
@ -348,7 +370,7 @@ s/\$//g
}
#
# The currently-empty flags field.
# The flags, if any.
#
{
flags = "0";

View File

@ -65,6 +65,11 @@ struct sysent { /* system call table */
u_int32_t sy_thrcnt;
};
/*
* A system call is permitted in capability mode.
*/
#define SYF_CAPENABLED 0x00000001
#define SY_THR_FLAGMASK 0x7
#define SY_THR_STATIC 0x1
#define SY_THR_DRAINING 0x2