2609222ab4
- Capability is no longer separate descriptor type. Now every descriptor has set of its own capability rights. - The cap_new(2) system call is left, but it is no longer documented and should not be used in new code. - The new syscall cap_rights_limit(2) should be used instead of cap_new(2), which limits capability rights of the given descriptor without creating a new one. - The cap_getrights(2) syscall is renamed to cap_rights_get(2). - If CAP_IOCTL capability right is present we can further reduce allowed ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed ioctls can be retrived with cap_ioctls_get(2) syscall. - If CAP_FCNTL capability right is present we can further reduce fcntls that can be used with the new cap_fcntls_limit(2) syscall and retrive them with cap_fcntls_get(2). - To support ioctl and fcntl white-listing the filedesc structure was heavly modified. - The audit subsystem, kdump and procstat tools were updated to recognize new syscalls. - Capability rights were revised and eventhough I tried hard to provide backward API and ABI compatibility there are some incompatible changes that are described in detail below: CAP_CREATE old behaviour: - Allow for openat(2)+O_CREAT. - Allow for linkat(2). - Allow for symlinkat(2). CAP_CREATE new behaviour: - Allow for openat(2)+O_CREAT. Added CAP_LINKAT: - Allow for linkat(2). ABI: Reuses CAP_RMDIR bit. - Allow to be target for renameat(2). Added CAP_SYMLINKAT: - Allow for symlinkat(2). Removed CAP_DELETE. Old behaviour: - Allow for unlinkat(2) when removing non-directory object. - Allow to be source for renameat(2). Removed CAP_RMDIR. Old behaviour: - Allow for unlinkat(2) when removing directory. Added CAP_RENAMEAT: - Required for source directory for the renameat(2) syscall. Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR): - Allow for unlinkat(2) on any object. - Required if target of renameat(2) exists and will be removed by this call. Removed CAP_MAPEXEC. CAP_MMAP old behaviour: - Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and PROT_WRITE. CAP_MMAP new behaviour: - Allow for mmap(2)+PROT_NONE. Added CAP_MMAP_R: - Allow for mmap(PROT_READ). Added CAP_MMAP_W: - Allow for mmap(PROT_WRITE). Added CAP_MMAP_X: - Allow for mmap(PROT_EXEC). Added CAP_MMAP_RW: - Allow for mmap(PROT_READ | PROT_WRITE). Added CAP_MMAP_RX: - Allow for mmap(PROT_READ | PROT_EXEC). Added CAP_MMAP_WX: - Allow for mmap(PROT_WRITE | PROT_EXEC). Added CAP_MMAP_RWX: - Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC). Renamed CAP_MKDIR to CAP_MKDIRAT. Renamed CAP_MKFIFO to CAP_MKFIFOAT. Renamed CAP_MKNODE to CAP_MKNODEAT. CAP_READ old behaviour: - Allow pread(2). - Disallow read(2), readv(2) (if there is no CAP_SEEK). CAP_READ new behaviour: - Allow read(2), readv(2). - Disallow pread(2) (CAP_SEEK was also required). CAP_WRITE old behaviour: - Allow pwrite(2). - Disallow write(2), writev(2) (if there is no CAP_SEEK). CAP_WRITE new behaviour: - Allow write(2), writev(2). - Disallow pwrite(2) (CAP_SEEK was also required). Added convinient defines: #define CAP_PREAD (CAP_SEEK | CAP_READ) #define CAP_PWRITE (CAP_SEEK | CAP_WRITE) #define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ) #define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE) #define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL) #define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W) #define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X) #define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X) #define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X) #define CAP_RECV CAP_READ #define CAP_SEND CAP_WRITE #define CAP_SOCK_CLIENT \ (CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \ CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN) #define CAP_SOCK_SERVER \ (CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \ CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \ CAP_SETSOCKOPT | CAP_SHUTDOWN) Added defines for backward API compatibility: #define CAP_MAPEXEC CAP_MMAP_X #define CAP_DELETE CAP_UNLINKAT #define CAP_MKDIR CAP_MKDIRAT #define CAP_RMDIR CAP_UNLINKAT #define CAP_MKFIFO CAP_MKFIFOAT #define CAP_MKNOD CAP_MKNODAT #define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER) Sponsored by: The FreeBSD Foundation Reviewed by: Christoph Mallon <christoph.mallon@gmx.de> Many aspects discussed with: rwatson, benl, jonathan ABI compatibility discussed with: kib
241 lines
6.0 KiB
C
241 lines
6.0 KiB
C
/*-
|
|
* Copyright (c) 1992, 1993, 1995
|
|
* The Regents of the University of California. All rights reserved.
|
|
*
|
|
* This code is derived from software donated to Berkeley by
|
|
* Jan-Simon Pendry.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* @(#)fdesc_vfsops.c 8.4 (Berkeley) 1/21/94
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
/*
|
|
* /dev/fd Filesystem
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/filedesc.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/racct.h>
|
|
#include <sys/resourcevar.h>
|
|
#include <sys/vnode.h>
|
|
|
|
#include <fs/fdescfs/fdesc.h>
|
|
|
|
static MALLOC_DEFINE(M_FDESCMNT, "fdesc_mount", "FDESC mount structure");
|
|
|
|
static vfs_cmount_t fdesc_cmount;
|
|
static vfs_mount_t fdesc_mount;
|
|
static vfs_unmount_t fdesc_unmount;
|
|
static vfs_statfs_t fdesc_statfs;
|
|
static vfs_root_t fdesc_root;
|
|
|
|
/*
|
|
* Compatibility shim for old mount(2) system call.
|
|
*/
|
|
int
|
|
fdesc_cmount(struct mntarg *ma, void *data, uint64_t flags)
|
|
{
|
|
return kernel_mount(ma, flags);
|
|
}
|
|
|
|
/*
|
|
* Mount the per-process file descriptors (/dev/fd)
|
|
*/
|
|
static int
|
|
fdesc_mount(struct mount *mp)
|
|
{
|
|
int error = 0;
|
|
struct fdescmount *fmp;
|
|
struct vnode *rvp;
|
|
|
|
/*
|
|
* Update is a no-op
|
|
*/
|
|
if (mp->mnt_flag & (MNT_UPDATE | MNT_ROOTFS))
|
|
return (EOPNOTSUPP);
|
|
|
|
fmp = malloc(sizeof(struct fdescmount),
|
|
M_FDESCMNT, M_WAITOK); /* XXX */
|
|
|
|
/*
|
|
* We need to initialize a few bits of our local mount point struct to
|
|
* avoid confusion in allocvp.
|
|
*/
|
|
mp->mnt_data = (qaddr_t) fmp;
|
|
fmp->flags = 0;
|
|
error = fdesc_allocvp(Froot, -1, FD_ROOT, mp, &rvp);
|
|
if (error) {
|
|
free(fmp, M_FDESCMNT);
|
|
mp->mnt_data = NULL;
|
|
return (error);
|
|
}
|
|
rvp->v_type = VDIR;
|
|
rvp->v_vflag |= VV_ROOT;
|
|
fmp->f_root = rvp;
|
|
VOP_UNLOCK(rvp, 0);
|
|
/* XXX -- don't mark as local to work around fts() problems */
|
|
/*mp->mnt_flag |= MNT_LOCAL;*/
|
|
vfs_getnewfsid(mp);
|
|
|
|
vfs_mountedfrom(mp, "fdescfs");
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
fdesc_unmount(mp, mntflags)
|
|
struct mount *mp;
|
|
int mntflags;
|
|
{
|
|
struct fdescmount *fmp;
|
|
caddr_t data;
|
|
int error;
|
|
int flags = 0;
|
|
|
|
fmp = (struct fdescmount *)mp->mnt_data;
|
|
if (mntflags & MNT_FORCE) {
|
|
/* The hash mutex protects the private mount flags. */
|
|
mtx_lock(&fdesc_hashmtx);
|
|
fmp->flags |= FMNT_UNMOUNTF;
|
|
mtx_unlock(&fdesc_hashmtx);
|
|
flags |= FORCECLOSE;
|
|
}
|
|
|
|
/*
|
|
* Clear out buffer cache. I don't think we
|
|
* ever get anything cached at this level at the
|
|
* moment, but who knows...
|
|
*
|
|
* There is 1 extra root vnode reference corresponding
|
|
* to f_root.
|
|
*/
|
|
if ((error = vflush(mp, 1, flags, curthread)) != 0)
|
|
return (error);
|
|
|
|
/*
|
|
* Finally, throw away the fdescmount structure. Hold the hashmtx to
|
|
* protect the fdescmount structure.
|
|
*/
|
|
mtx_lock(&fdesc_hashmtx);
|
|
data = mp->mnt_data;
|
|
mp->mnt_data = NULL;
|
|
mtx_unlock(&fdesc_hashmtx);
|
|
free(data, M_FDESCMNT); /* XXX */
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
fdesc_root(mp, flags, vpp)
|
|
struct mount *mp;
|
|
int flags;
|
|
struct vnode **vpp;
|
|
{
|
|
struct vnode *vp;
|
|
|
|
/*
|
|
* Return locked reference to root.
|
|
*/
|
|
vp = VFSTOFDESC(mp)->f_root;
|
|
vget(vp, LK_EXCLUSIVE | LK_RETRY, curthread);
|
|
*vpp = vp;
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
fdesc_statfs(mp, sbp)
|
|
struct mount *mp;
|
|
struct statfs *sbp;
|
|
{
|
|
struct thread *td;
|
|
struct filedesc *fdp;
|
|
int lim;
|
|
int i;
|
|
int last;
|
|
int freefd;
|
|
uint64_t limit;
|
|
|
|
td = curthread;
|
|
|
|
/*
|
|
* Compute number of free file descriptors.
|
|
* [ Strange results will ensue if the open file
|
|
* limit is ever reduced below the current number
|
|
* of open files... ]
|
|
*/
|
|
PROC_LOCK(td->td_proc);
|
|
lim = lim_cur(td->td_proc, RLIMIT_NOFILE);
|
|
PROC_UNLOCK(td->td_proc);
|
|
fdp = td->td_proc->p_fd;
|
|
FILEDESC_SLOCK(fdp);
|
|
limit = racct_get_limit(td->td_proc, RACCT_NOFILE);
|
|
if (lim > limit)
|
|
lim = limit;
|
|
last = min(fdp->fd_nfiles, lim);
|
|
freefd = 0;
|
|
for (i = fdp->fd_freefile; i < last; i++)
|
|
if (fdp->fd_ofiles[i].fde_file == NULL)
|
|
freefd++;
|
|
|
|
/*
|
|
* Adjust for the fact that the fdesc array may not
|
|
* have been fully allocated yet.
|
|
*/
|
|
if (fdp->fd_nfiles < lim)
|
|
freefd += (lim - fdp->fd_nfiles);
|
|
FILEDESC_SUNLOCK(fdp);
|
|
|
|
sbp->f_flags = 0;
|
|
sbp->f_bsize = DEV_BSIZE;
|
|
sbp->f_iosize = DEV_BSIZE;
|
|
sbp->f_blocks = 2; /* 1K to keep df happy */
|
|
sbp->f_bfree = 0;
|
|
sbp->f_bavail = 0;
|
|
sbp->f_files = lim + 1; /* Allow for "." */
|
|
sbp->f_ffree = freefd; /* See comments above */
|
|
return (0);
|
|
}
|
|
|
|
static struct vfsops fdesc_vfsops = {
|
|
.vfs_cmount = fdesc_cmount,
|
|
.vfs_init = fdesc_init,
|
|
.vfs_mount = fdesc_mount,
|
|
.vfs_root = fdesc_root,
|
|
.vfs_statfs = fdesc_statfs,
|
|
.vfs_uninit = fdesc_uninit,
|
|
.vfs_unmount = fdesc_unmount,
|
|
};
|
|
|
|
VFS_SET(fdesc_vfsops, fdescfs, VFCF_SYNTHETIC);
|