Add a new file descriptor type for IPC shared memory objects and use it to

implement shm_open(2) and shm_unlink(2) in the kernel:
- Each shared memory file descriptor is associated with a swap-backed vm
  object which provides the backing store.  Each descriptor starts off with
  a size of zero, but the size can be altered via ftruncate(2).  The shared
  memory file descriptors also support fstat(2).  read(2), write(2),
  ioctl(2), select(2), poll(2), and kevent(2) are not supported on shared
  memory file descriptors.
- shm_open(2) and shm_unlink(2) are now implemented as system calls that
  manage shared memory file descriptors.  The virtual namespace that maps
  pathnames to shared memory file descriptors is implemented as a hash
  table where the hash key is generated via the 32-bit Fowler/Noll/Vo hash
  of the pathname.
- As an extension, the constant 'SHM_ANON' may be specified in place of the
  path argument to shm_open(2).  In this case, an unnamed shared memory
  file descriptor will be created similar to the IPC_PRIVATE key for
  shmget(2).  Note that the shared memory object can still be shared among
  processes by sharing the file descriptor via fork(2) or sendmsg(2), but
  it is unnamed.  This effectively serves to implement the getmemfd() idea
  bandied about the lists several times over the years.
- The backing store for shared memory file descriptors are garbage
  collected when they are not referenced by any open file descriptors or
  the shm_open(2) virtual namespace.

Submitted by:	dillon, peter (previous versions)
Submitted by:	rwatson (I based this on his version)
Reviewed by:	alc (suggested converting getmemfd() to shm_open())
This commit is contained in:
John Baldwin 2008-01-08 21:58:16 +00:00
parent 3b2262e488
commit 8e38aeff17
19 changed files with 1220 additions and 350 deletions

View File

@ -21,7 +21,7 @@ SRCS+= __getosreldate.c __xuname.c \
initgroups.c isatty.c isinf.c isnan.c jrand48.c lcong48.c \
lockf.c lrand48.c mrand48.c nftw.c nice.c \
nlist.c nrand48.c opendir.c \
pause.c pmadvise.c popen.c posixshm.c pselect.c \
pause.c pmadvise.c popen.c pselect.c \
psignal.c pw_scan.c pwcache.c \
raise.c readdir.c readpassphrase.c rewinddir.c \
scandir.c seed48.c seekdir.c sem.c semctl.c \
@ -59,7 +59,7 @@ MAN+= alarm.3 arc4random.3 \
raise.3 rand48.3 readpassphrase.3 rfork_thread.3 \
scandir.3 sem_destroy.3 sem_getvalue.3 sem_init.3 \
sem_open.3 sem_post.3 sem_wait.3 \
setjmp.3 setmode.3 setproctitle.3 shm_open.3 \
setjmp.3 setmode.3 setproctitle.3 \
siginterrupt.3 signal.3 sigsetops.3 sleep.3 \
statvfs.3 stringlist.3 \
strtofflags.3 sysconf.3 sysctl.3 syslog.3 tcgetpgrp.3 \
@ -133,7 +133,6 @@ MLINKS+=setjmp.3 _longjmp.3 setjmp.3 _setjmp.3 setjmp.3 longjmp.3 \
setjmp.3 longjmperr.3 setjmp.3 longjmperror.3 \
setjmp.3 siglongjmp.3 setjmp.3 sigsetjmp.3
MLINKS+=setmode.3 getmode.3
MLINKS+=shm_open.3 shm_unlink.3
MLINKS+=sigsetops.3 sigaddset.3 sigsetops.3 sigdelset.3 \
sigsetops.3 sigemptyset.3 sigsetops.3 sigfillset.3 \
sigsetops.3 sigismember.3

View File

@ -1,72 +0,0 @@
/*
* Copyright 2000 Massachusetts Institute of Technology
*
* Permission to use, copy, modify, and distribute this software and
* its documentation for any purpose and without fee is hereby
* granted, provided that both the above copyright notice and this
* permission notice appear in all copies, that both the above
* copyright notice and this permission notice appear in all
* supporting documentation, and that the name of M.I.T. not be used
* in advertising or publicity pertaining to distribution of the
* software without specific, written prior permission. M.I.T. makes
* no representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
*
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "namespace.h"
#include <sys/types.h>
#include <sys/fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <errno.h>
#include <unistd.h>
#include "un-namespace.h"
int
shm_open(const char *path, int flags, mode_t mode)
{
int fd;
struct stat stab;
if ((flags & O_ACCMODE) == O_WRONLY)
return (EINVAL);
fd = _open(path, flags, mode);
if (fd != -1) {
if (_fstat(fd, &stab) != 0 || !S_ISREG(stab.st_mode)) {
_close(fd);
errno = EINVAL;
return (-1);
}
if (_fcntl(fd, F_SETFL, (int)FPOSIXSHM) != 0) {
_close(fd);
return (-1);
}
}
return (fd);
}
int
shm_unlink(const char *path)
{
return (unlink(path));
}

View File

@ -1,192 +0,0 @@
.\"
.\" Copyright 2000 Massachusetts Institute of Technology
.\"
.\" Permission to use, copy, modify, and distribute this software and
.\" its documentation for any purpose and without fee is hereby
.\" granted, provided that both the above copyright notice and this
.\" permission notice appear in all copies, that both the above
.\" copyright notice and this permission notice appear in all
.\" supporting documentation, and that the name of M.I.T. not be used
.\" in advertising or publicity pertaining to distribution of the
.\" software without specific, written prior permission. M.I.T. makes
.\" no representations about the suitability of this software for any
.\" purpose. It is provided "as is" without express or implied
.\" warranty.
.\"
.\" THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
.\" ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
.\" INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
.\" MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
.\" SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
.\" SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
.\" LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
.\" USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
.\" ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
.\" OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
.\" OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd March 24, 2000
.Dt SHM_OPEN 3
.Os
.Sh NAME
.Nm shm_open , shm_unlink
.Nd "shared memory object operations"
.Sh LIBRARY
.Lb libc
.Sh SYNOPSIS
.In sys/types.h
.In sys/mman.h
.Ft int
.Fn shm_open "const char *path" "int flags" "mode_t mode"
.Ft int
.Fn shm_unlink "const char *path"
.Sh DESCRIPTION
The
.Fn shm_open
function opens (or optionally creates) a
.Tn POSIX
shared memory object named
.Fa path .
The
.Fn shm_unlink
function removes a shared memory object named
.Fa path .
.Pp
In the
.Fx
implementation,
.Tn POSIX
shared memory objects are implemented as ordinary files.
The
.Fn shm_open
and
.Fn shm_unlink
act as wrappers around the
.Xr open 2
and
.Xr unlink 2
routines, and
.Fa path ,
.Fa flags ,
and
.Fa mode
arguments are as specified for those functions.
The
.Fa flags
argument is checked to ensure that the access mode specified is not
.Dv O_WRONLY
(which is not defined for shared memory objects).
.Pp
In addition, the
.Fx
implementation causes
.Fn mmap
of a descriptor returned by
.Fn shm_open
to behave as if the
.Dv MAP_NOSYNC
flag had been specified to
.Xr mmap 2 .
(It does so by setting a special file flag using
.Xr fcntl 2 . )
.Pp
The
.Fn shm_unlink
function makes no effort to ensure that
.Fa path
refers to a shared memory object.
.Sh RETURN VALUES
If successful,
.Fn shm_open
returns a non-negative integer;
.Fn shm_unlink
returns zero.
Both functions return -1 on failure, and set
.Va errno
to indicate the error.
.Sh COMPATIBILITY
The
.Fa path
argument does not necessarily represent a pathname (although it does in this
and most other implementations).
Two processes opening the same
.Fa path
are guaranteed to access the same shared memory object if and only if
.Fa path
begins with a slash
.Pq Ql \&/
character.
.Pp
Only the
.Dv O_RDONLY ,
.Dv O_RDWR ,
.Dv O_CREAT ,
.Dv O_EXCL ,
and
.Dv O_TRUNC
flags may be used in portable programs.
.Pp
The result of using
.Xr open 2 ,
.Xr read 2 ,
or
.Xr write 2
on a shared memory object, or on the descriptor returned by
.Fn shm_open ,
is undefined.
It is also undefined whether the shared memory object itself, or its
contents, persist across reboots.
.Sh ERRORS
The
.Fn shm_open
and
.Fn shm_unlink
functions can fail with any error defined for
.Fn open
and
.Fn unlink ,
respectively.
In addition, the following errors are defined for
.Fn shm_open :
.Bl -tag -width Er
.It Bq Er EINVAL
The object named by
.Fa path
is not a shared memory object
(i.e., it is not a regular file).
.It Bq Er EINVAL
The
.Fa flags
argument to
.Fn shm_open
specifies an access mode of
.Dv O_WRONLY .
.El
.Sh SEE ALSO
.Xr mmap 2 ,
.Xr munmap 2 ,
.Xr open 2 ,
.Xr unlink 2
.Sh STANDARDS
The
.Fn shm_open
and
.Fn shm_unlink
functions are believed to conform to
.St -p1003.1b-93 .
.Sh HISTORY
The
.Fn shm_open
and
.Fn shm_unlink
functions first appeared in
.Fx 4.3 .
.Sh AUTHORS
.An Garrett A. Wollman Aq wollman@FreeBSD.org
(C library support and this manual page)
.Pp
.An Matthew Dillon Aq dillon@FreeBSD.org
.Pq Dv MAP_NOSYNC

View File

@ -83,7 +83,7 @@ MAN+= _exit.2 abort2.2 accept.2 access.2 acct.2 adjtime.2 \
read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
rtprio.2 select.2 semctl.2 semget.2 semop.2 send.2 sendfile.2 \
setgroups.2 setpgid.2 setregid.2 setresuid.2 setreuid.2 setsid.2 \
setuid.2 shmat.2 shmctl.2 shmget.2 shutdown.2 \
setuid.2 shmat.2 shmctl.2 shmget.2 shm_open.2 shutdown.2 \
sigaction.2 sigaltstack.2 sigpending.2 sigprocmask.2 sigqueue.2 \
sigreturn.2 sigstack.2 sigsuspend.2 sigwait.2 sigwaitinfo.2 \
socket.2 socketpair.2 stat.2 statfs.2 \
@ -154,6 +154,7 @@ MLINKS+=setpgid.2 setpgrp.2
MLINKS+=setresuid.2 setresgid.2 setresuid.2 getresuid.2 setresuid.2 getresgid.2
MLINKS+=setuid.2 setegid.2 setuid.2 seteuid.2 setuid.2 setgid.2
MLINKS+=shmat.2 shmdt.2
MLINKS+=shm_open.2 shm_unlink.2
MLINKS+=sigwaitinfo.2 sigtimedwait.2
MLINKS+=stat.2 fstat.2 stat.2 lstat.2
MLINKS+=statfs.2 fstatfs.2

View File

@ -28,8 +28,8 @@
.\"
.\" $FreeBSD$
.\"
.Dd March 24, 2000
.Dt SHM_OPEN 3
.Dd March 20, 2007
.Dt SHM_OPEN 2
.Os
.Sh NAME
.Nm shm_open , shm_unlink
@ -46,62 +46,104 @@
.Sh DESCRIPTION
The
.Fn shm_open
function opens (or optionally creates) a
system call opens (or optionally creates) a
.Tn POSIX
shared memory object named
.Fa path .
The
.Fa flags
argument contains a subset of the flags used by
.Xr open 2 .
An access mode of either
.Dv O_RDONLY
or
.Dv O_RDWR
must be included in
.Fa flags .
The optional flags
.Dv O_CREAT ,
.Dv O_EXCL ,
and
.Dv O_TRUNC
may also be specified.
.Pp
If
.Dv O_CREAT
is specified,
then a new shared memory object named
.Fa path
will be created if it does not exist.
In this case,
the shared memory object is created with mode
.Fa mode
subject to the process' umask value.
If both the
.Dv O_CREAT
and
.Dv O_EXCL
flags are specified and a shared memory object named
.Fa path
already exists,
then
.Fn shm_open
will fail with
.Er EEXIST.
.Pp
Newly created objects start off with a size of zero.
If an existing shared memory object is opened with
.Dv O_RDWR
and the
.Dv O_TRUNC
flag is specified,
then the shared memory object will be truncated to a size of zero.
The size of the object can be adjusted via
.Xr ftruncate 2
and queried via
.Xr fstat 2 .
.Pp
The new descriptor is set to close during
.Xr execve 2
system calls;
see
.Xr close 2
and
.Xr fcntl 2 .
.Pp
As a FreeBSD extension,
the constant
.Dv SHM_ANON
may be used for the
.Fa path
argument to
.Fn shm_open .
In this case, an anonymous, unnamed shared memory object is created.
Since the object has no name,
it cannot be removed via a subsequent call to
.Fn shm_unlink .
Instead,
the shared memory object will be garbage collected when the last reference to
the shared memory object is removed.
The shared memory object may be shared with other processes by sharing the
file descriptor via
.Xr fork 2
or
.Xr sendmsg 2 .
Attempting to open an anonymous shared memory object with
.Dv O_RDONLY
will fail with
.Er EINVAL .
All other flags are ignored.
.Pp
The
.Fn shm_unlink
function removes a shared memory object named
system call removes a shared memory object named
.Fa path .
.Pp
In the
.Fx
implementation,
.Tn POSIX
shared memory objects are implemented as ordinary files.
The
.Fn shm_open
and
.Fn shm_unlink
act as wrappers around the
.Xr open 2
and
.Xr unlink 2
routines, and
.Fa path ,
.Fa flags ,
and
.Fa mode
arguments are as specified for those functions.
The
.Fa flags
argument is checked to ensure that the access mode specified is not
.Dv O_WRONLY
(which is not defined for shared memory objects).
.Pp
In addition, the
.Fx
implementation causes
.Fn mmap
of a descriptor returned by
.Fn shm_open
to behave as if the
.Dv MAP_NOSYNC
flag had been specified to
.Xr mmap 2 .
(It does so by setting a special file flag using
.Xr fcntl 2 . )
.Pp
The
.Fn shm_unlink
function makes no effort to ensure that
.Fa path
refers to a shared memory object.
.Sh RETURN VALUES
If successful,
.Fn shm_open
returns a non-negative integer;
returns a non-negative integer,
and
.Fn shm_unlink
returns zero.
Both functions return -1 on failure, and set
@ -110,8 +152,8 @@ to indicate the error.
.Sh COMPATIBILITY
The
.Fa path
argument does not necessarily represent a pathname (although it does in this
and most other implementations).
argument does not necessarily represent a pathname (although it does in
most other implementations).
Two processes opening the same
.Fa path
are guaranteed to access the same shared memory object if and only if
@ -139,37 +181,82 @@ on a shared memory object, or on the descriptor returned by
is undefined.
It is also undefined whether the shared memory object itself, or its
contents, persist across reboots.
.Pp
In FreeBSD,
.Xr read 2
and
.Xr write 2
on a shared memory object will fail with
.Er EOPNOTSUPP
and neither shared memory objects nor their contents persist across reboots.
.Sh ERRORS
The
.Fn shm_open
and
.Fn shm_unlink
functions can fail with any error defined for
.Fn open
and
.Fn unlink ,
respectively.
In addition, the following errors are defined for
The following errors are defined for
.Fn shm_open :
.Bl -tag -width Er
.It Bq Er EINVAL
The object named by
A flag other than
.Dv O_RDONLY ,
.Dv O_RDWR ,
.Dv O_CREAT ,
.Dv O_EXCL ,
or
.Dv O_TRUNC
was included in
.Fa flags .
.It Bq Er EMFILE
The process has already reached its limit for open file descriptors.
.It Bq Er ENFILE
The system file table is full.
.It Bq Er EINVAL
.Dv O_RDONLY
was specified while creating an anonymous shared memory object via
.Dv SHM_ANON .
.It Bq Er EFAULT
The
.Fa path
is not a shared memory object
(i.e., it is not a regular file).
argument points outside the process' allocated address space.
.It Bq Er ENAMETOOLONG
The entire pathname exceeded 1023 characters.
.It Bq Er EINVAL
The
.Fa flags
argument to
.Fn shm_open
specifies an access mode of
.Dv O_WRONLY .
.Fa path
does not begin with a slash
.Pq Ql \&/
character.
.It Bq Er ENOENT
.Dv O_CREAT
is specified and the named shared memory object does not exist.
.It Bq Er EEXIST
.Dv O_CREAT
and
.Dv O_EXCL
are specified and the named shared memory object dies exist.
.It Bq Er EACCES
The required permissions (for reading or reading and writing) are denied.
.El
.Pp
The following errors are defined for
.Fn shm_unlink :
.Bl -tag -width Er
.It Bq Er EFAULT
The
.Fa path
argument points outside the process' allocated address space.
.It Bq Er ENAMETOOLONG
The entire pathname exceeded 1023 characters.
.It Bq Er ENOENT
The named shared memory object does not exist.
.It Bq Er EACCES
The required permissions are denied.
.Fn shm_unlink
requires write permission to the shared memory object.
.El
.Sh SEE ALSO
.Xr close 2 ,
.Xr ftruncate 2 ,
.Xr fstat 2 ,
.Xr mmap 2 ,
.Xr munmap 2 ,
.Xr open 2 ,
.Xr unlink 2
.Xr munmap 2
.Sh STANDARDS
The
.Fn shm_open
@ -184,6 +271,9 @@ and
.Fn shm_unlink
functions first appeared in
.Fx 4.3 .
The functions were reimplemented as system calls using shared memory objects
directly rather than files in
.Fx 7.0 .
.Sh AUTHORS
.An Garrett A. Wollman Aq wollman@FreeBSD.org
(C library support and this manual page)

View File

@ -796,3 +796,6 @@
480 AUE_FTRUNCATE STD { int freebsd32_ftruncate(int fd, \
u_int32_t lengthlo, u_int32_t lengthhi); }
481 AUE_KILL NOPROTO { int thr_kill2(pid_t pid, long id, int sig); }
482 AUE_NULL NOPROTO { int shm_open(const char *path, int flags, \
mode_t mode); }
483 AUE_NULL NOPROTO { int shm_unlink(const char *path); }

View File

@ -1540,6 +1540,7 @@ kern/uipc_mbuf.c standard
kern/uipc_mbuf2.c standard
kern/uipc_mqueue.c optional p1003_1b_mqueue
kern/uipc_sem.c optional p1003_1b_semaphores
kern/uipc_shm.c standard
kern/uipc_sockbuf.c standard
kern/uipc_socket.c standard
kern/uipc_syscalls.c standard
@ -2046,6 +2047,7 @@ security/mac/mac_label.c optional mac
security/mac/mac_net.c optional mac
security/mac/mac_pipe.c optional mac
security/mac/mac_posix_sem.c optional mac
security/mac/mac_posix_shm.c optional mac
security/mac/mac_priv.c optional mac
security/mac/mac_process.c optional mac
security/mac/mac_socket.c optional mac

View File

@ -2646,6 +2646,8 @@ file_type_to_name(short type)
return ("crpt");
case DTYPE_MQUEUE:
return ("mque");
case DTYPE_SHM:
return ("shm");
default:
return ("unkn");
}

View File

@ -847,5 +847,8 @@
479 AUE_TRUNCATE STD { int truncate(char *path, off_t length); }
480 AUE_FTRUNCATE STD { int ftruncate(int fd, off_t length); }
481 AUE_KILL STD { int thr_kill2(pid_t pid, long id, int sig); }
482 AUE_NULL STD { int shm_open(const char *path, int flags, \
mode_t mode); }
483 AUE_NULL STD { int shm_unlink(const char *path); }
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master

608
sys/kern/uipc_shm.c Normal file
View File

@ -0,0 +1,608 @@
/*-
* Copyright (c) 2006 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Support for shared swap-backed anonymous memory objects via
* shm_open(2) and shm_unlink(2). While most of the implementation is
* here, vm_mmap.c contains mapping logic changes.
*
* TODO:
*
* (1) Convert test utilities into regression tests and import them into
* src/tools/regression.
*
* (2) Need to export data to a userland tool via a sysctl. Should ipcs(1)
* and ipcrm(1) be expanded or should new tools to manage both POSIX
* kernel semaphores and POSIX shared memory be written?
*
* (3) Add support for this file type to fstat(1).
*
* (4) Resource limits? Does this need its own resource limits or are the
* existing limits in mmap(2) sufficient?
*
* (5) Partial page truncation. vnode_pager_setsize() will zero any parts
* of a partially mapped page as a result of ftruncate(2)/truncate(2).
* We can do the same (with the same pmap evil), but do we need to
* worry about the bits on disk if the page is swapped out or will the
* swapper zero the parts of a page that are invalid if the page is
* swapped back in for us?
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/fcntl.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/fnv_hash.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
#include <sys/sx.h>
#include <sys/time.h>
#include <sys/vnode.h>
#include <security/mac/mac_framework.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_pager.h>
#include <vm/swap_pager.h>
struct shm_mapping {
char *sm_path;
Fnv32_t sm_fnv;
struct shmfd *sm_shmfd;
LIST_ENTRY(shm_mapping) sm_link;
};
static MALLOC_DEFINE(M_SHMFD, "shmfd", "shared memory file descriptor");
static LIST_HEAD(, shm_mapping) *shm_dictionary;
static struct sx shm_dict_lock;
static struct mtx shm_timestamp_lock;
static u_long shm_hash;
#define SHM_HASH(fnv) (&shm_dictionary[(fnv) & shm_hash])
static int shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags);
static struct shmfd *shm_alloc(struct ucred *ucred, mode_t mode);
static void shm_dict_init(void *arg);
static void shm_drop(struct shmfd *shmfd);
static struct shmfd *shm_hold(struct shmfd *shmfd);
static void shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd);
static struct shmfd *shm_lookup(char *path, Fnv32_t fnv);
static int shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
static void shm_dotruncate(struct shmfd *shmfd, off_t length);
static fo_rdwr_t shm_read;
static fo_rdwr_t shm_write;
static fo_truncate_t shm_truncate;
static fo_ioctl_t shm_ioctl;
static fo_poll_t shm_poll;
static fo_kqfilter_t shm_kqfilter;
static fo_stat_t shm_stat;
static fo_close_t shm_close;
/* File descriptor operations. */
static struct fileops shm_ops = {
.fo_read = shm_read,
.fo_write = shm_write,
.fo_truncate = shm_truncate,
.fo_ioctl = shm_ioctl,
.fo_poll = shm_poll,
.fo_kqfilter = shm_kqfilter,
.fo_stat = shm_stat,
.fo_close = shm_close,
.fo_flags = DFLAG_PASSABLE
};
FEATURE(posix_shm, "POSIX shared memory");
static int
shm_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
return (EOPNOTSUPP);
}
static int
shm_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
int flags, struct thread *td)
{
return (EOPNOTSUPP);
}
static int
shm_truncate(struct file *fp, off_t length, struct ucred *active_cred,
struct thread *td)
{
struct shmfd *shmfd;
#ifdef MAC
int error;
#endif
shmfd = fp->f_data;
#ifdef MAC
error = mac_posixshm_check_truncate(active_cred, fp->f_cred, shmfd);
if (error)
return (error);
#endif
shm_dotruncate(shmfd, length);
return (0);
}
static int
shm_ioctl(struct file *fp, u_long com, void *data,
struct ucred *active_cred, struct thread *td)
{
return (EOPNOTSUPP);
}
static int
shm_poll(struct file *fp, int events, struct ucred *active_cred,
struct thread *td)
{
return (EOPNOTSUPP);
}
static int
shm_kqfilter(struct file *fp, struct knote *kn)
{
return (EOPNOTSUPP);
}
static int
shm_stat(struct file *fp, struct stat *sb, struct ucred *active_cred,
struct thread *td)
{
struct shmfd *shmfd;
#ifdef MAC
int error;
#endif
shmfd = fp->f_data;
#ifdef MAC
error = mac_posixshm_check_stat(active_cred, fp->f_cred, shmfd);
if (error)
return (error);
#endif
/*
* Attempt to return sanish values for fstat() on a memory file
* descriptor.
*/
bzero(sb, sizeof(*sb));
sb->st_mode = S_IFREG | shmfd->shm_mode; /* XXX */
sb->st_blksize = PAGE_SIZE;
sb->st_size = shmfd->shm_size;
sb->st_blocks = (sb->st_size + sb->st_blksize - 1) / sb->st_blksize;
sb->st_atimespec = shmfd->shm_atime;
sb->st_ctimespec = shmfd->shm_ctime;
sb->st_mtimespec = shmfd->shm_mtime;
sb->st_birthtimespec = shmfd->shm_birthtime;
sb->st_uid = shmfd->shm_uid;
sb->st_gid = shmfd->shm_gid;
return (0);
}
static int
shm_close(struct file *fp, struct thread *td)
{
struct shmfd *shmfd;
shmfd = fp->f_data;
fp->f_data = NULL;
shm_drop(shmfd);
return (0);
}
static void
shm_dotruncate(struct shmfd *shmfd, off_t length)
{
vm_object_t object;
vm_page_t m;
vm_pindex_t nobjsize;
object = shmfd->shm_object;
VM_OBJECT_LOCK(object);
if (length == shmfd->shm_size) {
VM_OBJECT_UNLOCK(object);
return;
}
nobjsize = OFF_TO_IDX(length + PAGE_MASK);
/* Are we shrinking? If so, trim the end. */
if (length < shmfd->shm_size) {
/* Toss in memory pages. */
if (nobjsize < object->size)
vm_object_page_remove(object, nobjsize, object->size,
FALSE);
/* Toss pages from swap. */
if (object->type == OBJT_SWAP)
swap_pager_freespace(object, nobjsize,
object->size - nobjsize);
/*
* If the last page is partially mapped, then zero out
* the garbage at the end of the page. See comments
* in vnode_page_setsize() for more details.
*
* XXXJHB: This handles in memory pages, but what about
* a page swapped out to disk?
*/
if ((length & PAGE_MASK) &&
(m = vm_page_lookup(object, OFF_TO_IDX(length))) != NULL &&
m->valid != 0) {
int base = (int)length & PAGE_MASK;
int size = PAGE_SIZE - base;
pmap_zero_page_area(m, base, size);
vm_page_lock_queues();
vm_page_set_validclean(m, base, size);
if (m->dirty != 0)
m->dirty = VM_PAGE_BITS_ALL;
vm_page_unlock_queues();
}
}
shmfd->shm_size = length;
mtx_lock(&shm_timestamp_lock);
vfs_timestamp(&shmfd->shm_ctime);
shmfd->shm_mtime = shmfd->shm_ctime;
mtx_unlock(&shm_timestamp_lock);
object->size = nobjsize;
VM_OBJECT_UNLOCK(object);
}
/*
* shmfd object management including creation and reference counting
* routines.
*/
static struct shmfd *
shm_alloc(struct ucred *ucred, mode_t mode)
{
struct shmfd *shmfd;
shmfd = malloc(sizeof(*shmfd), M_SHMFD, M_WAITOK | M_ZERO);
shmfd->shm_size = 0;
shmfd->shm_uid = ucred->cr_uid;
shmfd->shm_gid = ucred->cr_gid;
shmfd->shm_mode = mode;
shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL,
shmfd->shm_size, VM_PROT_DEFAULT, 0);
KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate"));
vfs_timestamp(&shmfd->shm_birthtime);
shmfd->shm_atime = shmfd->shm_mtime = shmfd->shm_ctime =
shmfd->shm_birthtime;
refcount_init(&shmfd->shm_refs, 1);
#ifdef MAC
mac_posixshm_init(shmfd);
mac_posixshm_create(ucred, shmfd);
#endif
return (shmfd);
}
static struct shmfd *
shm_hold(struct shmfd *shmfd)
{
refcount_acquire(&shmfd->shm_refs);
return (shmfd);
}
static void
shm_drop(struct shmfd *shmfd)
{
if (refcount_release(&shmfd->shm_refs)) {
#ifdef MAC
mac_posixshm_destroy(shmfd);
#endif
vm_object_deallocate(shmfd->shm_object);
free(shmfd, M_SHMFD);
}
}
/*
* Determine if the credentials have sufficient permissions for a
* specified combination of FREAD and FWRITE.
*/
static int
shm_access(struct shmfd *shmfd, struct ucred *ucred, int flags)
{
int acc_mode;
acc_mode = 0;
if (flags & FREAD)
acc_mode |= VREAD;
if (flags & FWRITE)
acc_mode |= VWRITE;
return (vaccess(VREG, shmfd->shm_mode, shmfd->shm_uid, shmfd->shm_gid,
acc_mode, ucred, NULL));
}
/*
* Dictionary management. We maintain an in-kernel dictionary to map
* paths to shmfd objects. We use the FNV hash on the path to store
* the mappings in a hash table.
*/
static void
shm_dict_init(void *arg)
{
mtx_init(&shm_timestamp_lock, "shm timestamps", NULL, MTX_DEF);
sx_init(&shm_dict_lock, "shm dictionary");
shm_dictionary = hashinit(1024, M_SHMFD, &shm_hash);
}
SYSINIT(shm_dict_init, SI_SUB_SYSV_SHM, SI_ORDER_ANY, shm_dict_init, NULL);
static struct shmfd *
shm_lookup(char *path, Fnv32_t fnv)
{
struct shm_mapping *map;
LIST_FOREACH(map, SHM_HASH(fnv), sm_link) {
if (map->sm_fnv != fnv)
continue;
if (strcmp(map->sm_path, path) == 0)
return (map->sm_shmfd);
}
return (NULL);
}
static void
shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd)
{
struct shm_mapping *map;
map = malloc(sizeof(struct shm_mapping), M_SHMFD, M_WAITOK);
map->sm_path = path;
map->sm_fnv = fnv;
map->sm_shmfd = shm_hold(shmfd);
LIST_INSERT_HEAD(SHM_HASH(fnv), map, sm_link);
}
static int
shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred)
{
struct shm_mapping *map;
int error;
LIST_FOREACH(map, SHM_HASH(fnv), sm_link) {
if (map->sm_fnv != fnv)
continue;
if (strcmp(map->sm_path, path) == 0) {
#ifdef MAC
error = mac_posixshm_check_unlink(ucred, map->sm_shmfd);
if (error)
return (error);
#endif
error = shm_access(map->sm_shmfd, ucred,
FREAD | FWRITE);
if (error)
return (error);
LIST_REMOVE(map, sm_link);
shm_drop(map->sm_shmfd);
free(map->sm_path, M_SHMFD);
free(map, M_SHMFD);
return (0);
}
}
return (ENOENT);
}
/* System calls. */
int
shm_open(struct thread *td, struct shm_open_args *uap)
{
struct filedesc *fdp;
struct shmfd *shmfd;
struct file *fp;
char *path;
Fnv32_t fnv;
mode_t cmode;
int fd, error;
if ((uap->flags & O_ACCMODE) != O_RDONLY &&
(uap->flags & O_ACCMODE) != O_RDWR)
return (EINVAL);
if ((uap->flags & ~(O_ACCMODE | O_CREAT | O_EXCL | O_TRUNC)) != 0)
return (EINVAL);
fdp = td->td_proc->p_fd;
cmode = (uap->mode & ~fdp->fd_cmask) & ACCESSPERMS;
error = falloc(td, &fp, &fd);
if (error)
return (error);
/* A SHM_ANON path pointer creates an anonymous object. */
if (uap->path == SHM_ANON) {
/* A read-only anonymous object is pointless. */
if ((uap->flags & O_ACCMODE) == O_RDONLY) {
fdclose(fdp, fp, fd, td);
fdrop(fp, td);
return (EINVAL);
}
shmfd = shm_alloc(td->td_ucred, cmode);
} else {
path = malloc(MAXPATHLEN, M_SHMFD, M_WAITOK);
error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
/* Require paths to start with a '/' character. */
if (error == 0 && path[0] != '/')
error = EINVAL;
if (error) {
fdclose(fdp, fp, fd, td);
fdrop(fp, td);
free(path, M_SHMFD);
return (error);
}
fnv = fnv_32_str(path, FNV1_32_INIT);
sx_xlock(&shm_dict_lock);
shmfd = shm_lookup(path, fnv);
if (shmfd == NULL) {
/* Object does not yet exist, create it if requested. */
if (uap->flags & O_CREAT) {
shmfd = shm_alloc(td->td_ucred, cmode);
shm_insert(path, fnv, shmfd);
} else {
free(path, M_SHMFD);
error = ENOENT;
}
} else {
/*
* Object already exists, obtain a new
* reference if requested and permitted.
*/
free(path, M_SHMFD);
if ((uap->flags & (O_CREAT | O_EXCL)) ==
(O_CREAT | O_EXCL))
error = EEXIST;
else {
#ifdef MAC
error = mac_posixshm_check_open(td->td_ucred,
shmfd);
if (error == 0)
#endif
error = shm_access(shmfd, td->td_ucred,
FFLAGS(uap->flags & O_ACCMODE));
}
/*
* Truncate the file back to zero length if
* O_TRUNC was specified and the object was
* opened with read/write.
*/
if (error == 0 &&
(uap->flags & (O_ACCMODE | O_TRUNC)) ==
(O_RDWR | O_TRUNC)) {
#ifdef MAC
error = mac_posixshm_check_truncate(
td->td_ucred, fp->f_cred, shmfd);
if (error == 0)
#endif
shm_dotruncate(shmfd, 0);
}
if (error == 0)
shm_hold(shmfd);
}
sx_xunlock(&shm_dict_lock);
if (error) {
fdclose(fdp, fp, fd, td);
fdrop(fp, td);
return (error);
}
}
finit(fp, FFLAGS(uap->flags & O_ACCMODE), DTYPE_SHM, shmfd, &shm_ops);
FILEDESC_XLOCK(fdp);
if (fdp->fd_ofiles[fd] == fp)
fdp->fd_ofileflags[fd] |= UF_EXCLOSE;
FILEDESC_XUNLOCK(fdp);
td->td_retval[0] = fd;
fdrop(fp, td);
return (0);
}
int
shm_unlink(struct thread *td, struct shm_unlink_args *uap)
{
char *path;
Fnv32_t fnv;
int error;
path = malloc(MAXPATHLEN, M_TEMP, M_WAITOK);
error = copyinstr(uap->path, path, MAXPATHLEN, NULL);
if (error) {
free(path, M_TEMP);
return (error);
}
fnv = fnv_32_str(path, FNV1_32_INIT);
sx_xlock(&shm_dict_lock);
error = shm_remove(path, fnv, td->td_ucred);
sx_xunlock(&shm_dict_lock);
free(path, M_TEMP);
return (error);
}
/*
* mmap() helper to validate mmap() requests against shm object state
* and give mmap() the vm_object to use for the mapping.
*/
int
shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
vm_object_t *obj)
{
/*
* XXXRW: This validation is probably insufficient, and subject to
* sign errors. It should be fixed.
*/
if (foff >= shmfd->shm_size || foff + objsize > shmfd->shm_size)
return (EINVAL);
mtx_lock(&shm_timestamp_lock);
vfs_timestamp(&shmfd->shm_atime);
mtx_unlock(&shm_timestamp_lock);
vm_object_reference(shmfd->shm_object);
*obj = shmfd->shm_object;
return (0);
}

View File

@ -71,6 +71,7 @@ struct msg;
struct msqid_kernel;
struct proc;
struct semid_kernel;
struct shmfd;
struct shmid_kernel;
struct sockaddr;
struct socket;
@ -198,6 +199,18 @@ void mac_posixsem_create(struct ucred *cred, struct ksem *ks);
void mac_posixsem_destroy(struct ksem *);
void mac_posixsem_init(struct ksem *);
int mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
int prot, int flags);
int mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd);
int mac_posixshm_check_stat(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd);
int mac_posixshm_check_truncate(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd);
int mac_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd);
void mac_posixshm_create(struct ucred *cred, struct shmfd *shmfd);
void mac_posixshm_destroy(struct shmfd *);
void mac_posixshm_init(struct shmfd *);
int mac_priv_check(struct ucred *cred, int priv);
int mac_priv_grant(struct ucred *cred, int priv);

View File

@ -83,6 +83,7 @@ struct pipepair;
struct proc;
struct sbuf;
struct semid_kernel;
struct shmfd;
struct shmid_kernel;
struct sockaddr;
struct socket;
@ -305,6 +306,24 @@ typedef void (*mpo_posixsem_create_t)(struct ucred *cred,
typedef void (*mpo_posixsem_destroy_label_t)(struct label *label);
typedef void (*mpo_posixsem_init_label_t)(struct label *label);
typedef int (*mpo_posixshm_check_mmap_t)(struct ucred *cred,
struct shmfd *shmfd, struct label *shmlabel, int prot,
int flags);
typedef int (*mpo_posixshm_check_open_t)(struct ucred *cred,
struct shmfd *shmfd, struct label *shmlabel);
typedef int (*mpo_posixshm_check_stat_t)(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd,
struct label *shmlabel);
typedef int (*mpo_posixshm_check_truncate_t)(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd,
struct label *shmlabel);
typedef int (*mpo_posixshm_check_unlink_t)(struct ucred *cred,
struct shmfd *shmfd, struct label *shmlabel);
typedef void (*mpo_posixshm_create_t)(struct ucred *cred,
struct shmfd *shmfd, struct label *shmlabel);
typedef void (*mpo_posixshm_destroy_label_t)(struct label *label);
typedef void (*mpo_posixshm_init_label_t)(struct label *label);
typedef int (*mpo_priv_check_t)(struct ucred *cred, int priv);
typedef int (*mpo_priv_grant_t)(struct ucred *cred, int priv);
@ -733,6 +752,15 @@ struct mac_policy_ops {
mpo_posixsem_destroy_label_t mpo_posixsem_destroy_label;
mpo_posixsem_init_label_t mpo_posixsem_init_label;
mpo_posixshm_check_mmap_t mpo_posixshm_check_mmap;
mpo_posixshm_check_open_t mpo_posixshm_check_open;
mpo_posixshm_check_stat_t mpo_posixshm_check_stat;
mpo_posixshm_check_truncate_t mpo_posixshm_check_truncate;
mpo_posixshm_check_unlink_t mpo_posixshm_check_unlink;
mpo_posixshm_create_t mpo_posixshm_create;
mpo_posixshm_destroy_label_t mpo_posixshm_destroy_label;
mpo_posixshm_init_label_t mpo_posixshm_init_label;
mpo_priv_check_t mpo_priv_check;
mpo_priv_grant_t mpo_priv_grant;

View File

@ -0,0 +1,146 @@
/*-
* Copyright (c) 2003-2006 SPARTA, Inc.
* All rights reserved.
*
* This software was developed for the FreeBSD Project in part by Network
* Associates Laboratories, the Security Research Division of Network
* Associates, Inc. under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"),
* as part of the DARPA CHATS research program.
*
* This software was enhanced by SPARTA ISSO under SPAWAR contract
* N66001-04-C-6019 ("SEFOS").
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mac.h"
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/mman.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <security/mac/mac_framework.h>
#include <security/mac/mac_internal.h>
#include <security/mac/mac_policy.h>
static struct label *
mac_posixshm_label_alloc(void)
{
struct label *label;
label = mac_labelzone_alloc(M_WAITOK);
MAC_PERFORM(posixshm_init_label, label);
return (label);
}
void
mac_posixshm_init(struct shmfd *shmfd)
{
shmfd->shm_label = mac_posixshm_label_alloc();
}
static void
mac_posixshm_label_free(struct label *label)
{
MAC_PERFORM(posixshm_destroy_label, label);
mac_labelzone_free(label);
}
void
mac_posixshm_destroy(struct shmfd *shmfd)
{
mac_posixshm_label_free(shmfd->shm_label);
shmfd->shm_label = NULL;
}
void
mac_posixshm_create(struct ucred *cred, struct shmfd *shmfd)
{
MAC_PERFORM(posixshm_create, cred, shmfd, shmfd->shm_label);
}
int
mac_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd, int prot,
int flags)
{
int error;
MAC_CHECK(posixshm_check_mmap, cred, shmfd, shmfd->shm_label, prot,
flags);
return (error);
}
int
mac_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd)
{
int error;
MAC_CHECK(posixshm_check_open, cred, shmfd, shmfd->shm_label);
return (error);
}
int
mac_posixshm_check_stat(struct ucred *active_cred, struct ucred *file_cred,
struct shmfd *shmfd)
{
int error;
MAC_CHECK(posixshm_check_stat, active_cred, file_cred, shmfd,
shmfd->shm_label);
return (error);
}
int
mac_posixshm_check_truncate(struct ucred *active_cred, struct ucred *file_cred,
struct shmfd *shmfd)
{
int error;
MAC_CHECK(posixshm_check_truncate, active_cred, file_cred, shmfd,
shmfd->shm_label);
return (error);
}
int
mac_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd)
{
int error;
MAC_CHECK(posixshm_check_unlink, cred, shmfd, shmfd->shm_label);
return (error);
}

View File

@ -577,6 +577,53 @@ stub_posixsem_create(struct ucred *cred, struct ksem *ks,
}
static int
stub_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
struct label *shmlabel, int prot, int flags)
{
return (0);
}
static int
stub_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd,
struct label *shmlabel)
{
return (0);
}
static int
stub_posixshm_check_stat(struct ucred *active_cred, struct ucred *file_cred,
struct shmfd *shmfd, struct label *shmlabel)
{
return (0);
}
static int
stub_posixshm_check_truncate(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd, struct label *shmlabel)
{
return (0);
}
static int
stub_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd,
struct label *shmlabel)
{
return (0);
}
static void
stub_posixshm_create(struct ucred *cred, struct shmfd *shmfd,
struct label *shmlabel)
{
}
static int
stub_priv_check(struct ucred *cred, int priv)
{
@ -1550,6 +1597,15 @@ static struct mac_policy_ops stub_ops =
.mpo_posixsem_destroy_label = stub_destroy_label,
.mpo_posixsem_init_label = stub_init_label,
.mpo_posixshm_check_mmap = stub_posixshm_check_mmap,
.mpo_posixshm_check_open = stub_posixshm_check_open,
.mpo_posixshm_check_stat = stub_posixshm_check_stat,
.mpo_posixshm_check_truncate = stub_posixshm_check_truncate,
.mpo_posixshm_check_unlink = stub_posixshm_check_unlink,
.mpo_posixshm_create = stub_posixshm_create,
.mpo_posixshm_destroy_label = stub_destroy_label,
.mpo_posixshm_init_label = stub_init_label,
.mpo_priv_check = stub_priv_check,
.mpo_priv_grant = stub_priv_grant,

View File

@ -94,6 +94,7 @@ SYSCTL_NODE(_security_mac, OID_AUTO, test, CTLFLAG_RW, 0,
#define MAGIC_SYSV_SHM 0x76119ab0
#define MAGIC_PIPE 0xdc6c9919
#define MAGIC_POSIX_SEM 0x78ae980c
#define MAGIC_POSIX_SHM 0x4e853fc9
#define MAGIC_PROC 0x3b4be98f
#define MAGIC_CRED 0x9a5a4987
#define MAGIC_VNODE 0x1a67a45c
@ -1116,6 +1117,92 @@ test_posixsem_init_label(struct label *label)
COUNTER_INC(posixsem_init_label);
}
COUNTER_DECL(posixshm_check_mmap);
static int
test_posixshm_check_mmap(struct ucred *cred, struct shmfd *shmfd,
struct label *shmfdlabel, int prot, int flags)
{
LABEL_CHECK(cred->cr_label, MAGIC_CRED);
LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
return (0);
}
COUNTER_DECL(posixshm_check_open);
static int
test_posixshm_check_open(struct ucred *cred, struct shmfd *shmfd,
struct label *shmfdlabel)
{
LABEL_CHECK(cred->cr_label, MAGIC_CRED);
LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
return (0);
}
COUNTER_DECL(posixshm_check_stat);
static int
test_posixshm_check_stat(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd, struct label *shmfdlabel)
{
LABEL_CHECK(active_cred->cr_label, MAGIC_CRED);
LABEL_CHECK(file_cred->cr_label, MAGIC_CRED);
LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
return (0);
}
COUNTER_DECL(posixshm_check_truncate);
static int
test_posixshm_check_truncate(struct ucred *active_cred,
struct ucred *file_cred, struct shmfd *shmfd, struct label *shmfdlabel)
{
LABEL_CHECK(active_cred->cr_label, MAGIC_CRED);
LABEL_CHECK(file_cred->cr_label, MAGIC_CRED);
LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
return (0);
}
COUNTER_DECL(posixshm_check_unlink);
static int
test_posixshm_check_unlink(struct ucred *cred, struct shmfd *shmfd,
struct label *shmfdlabel)
{
LABEL_CHECK(cred->cr_label, MAGIC_CRED);
LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
return (0);
}
COUNTER_DECL(posixshm_create);
static void
test_posixshm_create(struct ucred *cred, struct shmfd *shmfd,
struct label *shmfdlabel)
{
LABEL_CHECK(cred->cr_label, MAGIC_CRED);
LABEL_CHECK(shmfdlabel, MAGIC_POSIX_SHM);
COUNTER_INC(posixshm_create);
}
COUNTER_DECL(posixshm_destroy_label);
static void
test_posixshm_destroy_label(struct label *label)
{
LABEL_DESTROY(label, MAGIC_POSIX_SHM);
COUNTER_INC(posixshm_destroy_label);
}
COUNTER_DECL(posixshm_init_label);
static void
test_posixshm_init_label(struct label *label)
{
LABEL_INIT(label, MAGIC_POSIX_SHM);
COUNTER_INC(posixshm_init_label);
}
COUNTER_DECL(proc_check_debug);
static int
test_proc_check_debug(struct ucred *cred, struct proc *p)
@ -2809,6 +2896,15 @@ static struct mac_policy_ops test_ops =
.mpo_posixsem_destroy_label = test_posixsem_destroy_label,
.mpo_posixsem_init_label = test_posixsem_init_label,
.mpo_posixshm_check_mmap = test_posixshm_check_mmap,
.mpo_posixshm_check_open = test_posixshm_check_open,
.mpo_posixshm_check_stat = test_posixshm_check_stat,
.mpo_posixshm_check_truncate = test_posixshm_check_truncate,
.mpo_posixshm_check_unlink = test_posixshm_check_unlink,
.mpo_posixshm_create = test_posixshm_create,
.mpo_posixshm_destroy_label = test_posixshm_destroy_label,
.mpo_posixshm_init_label = test_posixshm_init_label,
.mpo_proc_check_debug = test_proc_check_debug,
.mpo_proc_check_sched = test_proc_check_sched,
.mpo_proc_check_setaudit = test_proc_check_setaudit,

View File

@ -126,8 +126,20 @@ typedef __pid_t pid_t;
/* bits to save after open */
#define FMASK (FREAD|FWRITE|FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT)
/* bits settable by fcntl(F_SETFL, ...) */
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|O_DIRECT)
#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
/*
* Set by shm_open(3) in older libc's to get automatic MAP_ASYNC
* behavior for POSIX shared memory objects (which are otherwise
* implemented as plain files).
*/
#define FPOSIXSHM O_NOFOLLOW
#undef FCNTLFLAGS
#define FCNTLFLAGS (FAPPEND|FASYNC|FFSYNC|FNONBLOCK|FPOSIXSHM|O_DIRECT)
#endif
#endif
/*
* The O_* flags used to have only F* names, which were used in the kernel
@ -150,13 +162,6 @@ typedef __pid_t pid_t;
* different meaning for fcntl(2).
*/
#if __BSD_VISIBLE
/*
* Set by shm_open(3) to get automatic MAP_ASYNC behavior
* for POSIX shared memory objects (which are otherwise
* implemented as plain files).
*/
#define FPOSIXSHM O_NOFOLLOW
#endif
/*

View File

@ -59,6 +59,7 @@ struct socket;
#define DTYPE_KQUEUE 5 /* event queue */
#define DTYPE_CRYPTO 6 /* crypto */
#define DTYPE_MQUEUE 7 /* posix message queue */
#define DTYPE_SHM 8 /* swap-backed shared memory */
#ifdef _KERNEL

View File

@ -139,6 +139,11 @@
#define MINCORE_MODIFIED 0x4 /* Page has been modified by us */
#define MINCORE_REFERENCED_OTHER 0x8 /* Page has been referenced */
#define MINCORE_MODIFIED_OTHER 0x10 /* Page has been modified */
/*
* Anonymous object constant for shm_open().
*/
#define SHM_ANON ((char *)1)
#endif /* __BSD_VISIBLE */
/*
@ -168,7 +173,33 @@ typedef __size_t size_t;
#define _SIZE_T_DECLARED
#endif
#ifndef _KERNEL
#ifdef _KERNEL
#include <vm/vm.h>
struct shmfd {
size_t shm_size;
vm_object_t shm_object;
int shm_refs;
uid_t shm_uid;
gid_t shm_gid;
mode_t shm_mode;
/*
* Values maintained solely to make this a better-behaved file
* descriptor for fstat() to run on.
*/
struct timespec shm_atime;
struct timespec shm_mtime;
struct timespec shm_ctime;
struct timespec shm_birthtime;
struct label *shm_label; /* MAC label */
};
int shm_mmap(struct shmfd *shmfd, vm_size_t objsize, vm_ooffset_t foff,
vm_object_t *obj);
#else /* !_KERNEL */
__BEGIN_DECLS
/*

View File

@ -118,6 +118,8 @@ static int vm_mmap_vnode(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
int *, struct vnode *, vm_ooffset_t, vm_object_t *);
static int vm_mmap_cdev(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
int *, struct cdev *, vm_ooffset_t, vm_object_t *);
static int vm_mmap_shm(struct thread *, vm_size_t, vm_prot_t, vm_prot_t *,
int *, struct shmfd *, vm_ooffset_t, vm_object_t *);
/*
* MPSAFE
@ -300,16 +302,29 @@ mmap(td, uap)
pos = 0;
} else {
/*
* Mapping file, get fp for validation. Obtain vnode and make
* sure it is of appropriate type.
* don't let the descriptor disappear on us if we block
* Mapping file, get fp for validation and
* don't let the descriptor disappear on us if we block.
*/
if ((error = fget(td, uap->fd, &fp)) != 0)
goto done;
if (fp->f_type == DTYPE_SHM) {
handle = fp->f_data;
handle_type = OBJT_SWAP;
maxprot = VM_PROT_NONE;
/* FREAD should always be set. */
if (fp->f_flag & FREAD)
maxprot |= VM_PROT_EXECUTE | VM_PROT_READ;
if (fp->f_flag & FWRITE)
maxprot |= VM_PROT_WRITE;
goto map;
}
if (fp->f_type != DTYPE_VNODE) {
error = ENODEV;
goto done;
}
#if defined(COMPAT_FREEBSD7) || defined(COMPAT_FREEBSD6) || \
defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4)
/*
* POSIX shared-memory objects are defined to have
* kernel persistence, and are not defined to support
@ -320,6 +335,7 @@ mmap(td, uap)
*/
if (fp->f_flag & FPOSIXSHM)
flags |= MAP_NOSYNC;
#endif
vp = fp->f_vnode;
/*
* Ensure that file and memory protections are
@ -360,6 +376,7 @@ mmap(td, uap)
handle = (void *)vp;
handle_type = OBJT_VNODE;
}
map:
/*
* Do not allow more then a certain number of vm_map_entry structures
@ -1290,6 +1307,35 @@ vm_mmap_cdev(struct thread *td, vm_size_t objsize,
return (0);
}
/*
* vm_mmap_shm()
*
* MPSAFE
*
* Helper function for vm_mmap. Perform sanity check specific for mmap
* operations on shm file descriptors.
*/
int
vm_mmap_shm(struct thread *td, vm_size_t objsize,
vm_prot_t prot, vm_prot_t *maxprotp, int *flagsp,
struct shmfd *shmfd, vm_ooffset_t foff, vm_object_t *objp)
{
int error;
if ((*maxprotp & VM_PROT_WRITE) == 0 &&
(prot & PROT_WRITE) != 0)
return (EACCES);
#ifdef MAC
error = mac_posixshm_check_mmap(td->td_ucred, shmfd, prot, *flagsp);
if (error != 0)
return (error);
#endif
error = shm_mmap(shmfd, objsize, foff, objp);
if (error)
return (error);
return (0);
}
/*
* vm_mmap()
*
@ -1354,6 +1400,10 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
error = vm_mmap_vnode(td, size, prot, &maxprot, &flags,
handle, foff, &object);
break;
case OBJT_SWAP:
error = vm_mmap_shm(td, size, prot, &maxprot, &flags,
handle, foff, &object);
break;
case OBJT_DEFAULT:
if (handle == NULL) {
error = 0;