2005-01-06 23:35:40 +00:00
|
|
|
/*-
|
1994-05-24 10:09:53 +00:00
|
|
|
* Copyright (c) 1982, 1986, 1989, 1990, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2016-09-15 13:16:20 +00:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1994-05-24 10:09:53 +00:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
|
|
|
|
*/
|
|
|
|
|
2003-06-11 00:56:59 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
2011-06-30 10:56:02 +00:00
|
|
|
#include "opt_capsicum.h"
|
2009-06-10 14:36:59 +00:00
|
|
|
#include "opt_inet.h"
|
|
|
|
#include "opt_inet6.h"
|
1997-12-16 17:40:42 +00:00
|
|
|
#include "opt_compat.h"
|
1996-01-03 21:42:35 +00:00
|
|
|
#include "opt_ktrace.h"
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/param.h>
|
1994-05-25 09:21:21 +00:00
|
|
|
#include <sys/systm.h>
|
2014-03-16 10:55:57 +00:00
|
|
|
#include <sys/capsicum.h>
|
1998-11-05 14:28:26 +00:00
|
|
|
#include <sys/kernel.h>
|
2001-05-01 08:13:21 +00:00
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/mutex.h>
|
1995-10-23 15:42:12 +00:00
|
|
|
#include <sys/sysproto.h>
|
1997-10-12 20:26:33 +00:00
|
|
|
#include <sys/malloc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/filedesc.h>
|
|
|
|
#include <sys/proc.h>
|
2003-02-23 23:00:28 +00:00
|
|
|
#include <sys/filio.h>
|
2009-09-19 14:02:16 +00:00
|
|
|
#include <sys/jail.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/protosw.h>
|
2013-03-09 02:32:23 +00:00
|
|
|
#include <sys/rwlock.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/socketvar.h>
|
2003-02-03 17:36:52 +00:00
|
|
|
#include <sys/syscallsubr.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
#include <sys/ktrace.h>
|
|
|
|
#endif
|
2010-03-19 10:46:54 +00:00
|
|
|
#ifdef COMPAT_FREEBSD32
|
|
|
|
#include <compat/freebsd32/freebsd32_util.h>
|
|
|
|
#endif
|
2001-05-01 08:13:21 +00:00
|
|
|
|
2009-08-01 19:26:27 +00:00
|
|
|
#include <net/vnet.h>
|
|
|
|
|
2009-07-01 18:54:49 +00:00
|
|
|
#include <security/audit/audit.h>
|
2006-10-22 11:52:19 +00:00
|
|
|
#include <security/mac/mac_framework.h>
|
|
|
|
|
2002-03-19 21:25:46 +00:00
|
|
|
static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
|
2002-06-28 23:48:23 +00:00
|
|
|
static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
|
2004-01-11 19:56:42 +00:00
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
static int accept1(struct thread *td, int s, struct sockaddr *uname,
|
|
|
|
socklen_t *anamelen, int flags);
|
2002-03-19 21:25:46 +00:00
|
|
|
static int getsockname1(struct thread *td, struct getsockname_args *uap,
|
2002-03-24 05:09:11 +00:00
|
|
|
int compat);
|
2002-03-19 21:25:46 +00:00
|
|
|
static int getpeername1(struct thread *td, struct getpeername_args *uap,
|
2002-03-24 05:09:11 +00:00
|
|
|
int compat);
|
2016-05-18 22:05:50 +00:00
|
|
|
static int sockargs(struct mbuf **, char *, socklen_t, int);
|
1995-10-23 15:42:12 +00:00
|
|
|
|
2004-10-24 23:45:01 +00:00
|
|
|
/*
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
* Convert a user file descriptor to a kernel file entry and check if required
|
|
|
|
* capability rights are present.
|
2016-09-22 09:58:46 +00:00
|
|
|
* If required copy of current set of capability rights is returned.
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
* A reference on the file entry is held upon returning.
|
2004-10-24 23:45:01 +00:00
|
|
|
*/
|
2014-10-09 15:16:52 +00:00
|
|
|
int
|
2015-04-11 16:00:33 +00:00
|
|
|
getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp,
|
2016-09-22 09:58:46 +00:00
|
|
|
struct file **fpp, u_int *fflagp, struct filecaps *havecapsp)
|
2004-10-24 23:45:01 +00:00
|
|
|
{
|
|
|
|
struct file *fp;
|
|
|
|
int error;
|
|
|
|
|
2016-09-22 09:58:46 +00:00
|
|
|
error = fget_cap(td, fd, rightsp, &fp, havecapsp);
|
Merge Capsicum overhaul:
- Capability is no longer separate descriptor type. Now every descriptor
has set of its own capability rights.
- The cap_new(2) system call is left, but it is no longer documented and
should not be used in new code.
- The new syscall cap_rights_limit(2) should be used instead of
cap_new(2), which limits capability rights of the given descriptor
without creating a new one.
- The cap_getrights(2) syscall is renamed to cap_rights_get(2).
- If CAP_IOCTL capability right is present we can further reduce allowed
ioctls list with the new cap_ioctls_limit(2) syscall. List of allowed
ioctls can be retrived with cap_ioctls_get(2) syscall.
- If CAP_FCNTL capability right is present we can further reduce fcntls
that can be used with the new cap_fcntls_limit(2) syscall and retrive
them with cap_fcntls_get(2).
- To support ioctl and fcntl white-listing the filedesc structure was
heavly modified.
- The audit subsystem, kdump and procstat tools were updated to
recognize new syscalls.
- Capability rights were revised and eventhough I tried hard to provide
backward API and ABI compatibility there are some incompatible changes
that are described in detail below:
CAP_CREATE old behaviour:
- Allow for openat(2)+O_CREAT.
- Allow for linkat(2).
- Allow for symlinkat(2).
CAP_CREATE new behaviour:
- Allow for openat(2)+O_CREAT.
Added CAP_LINKAT:
- Allow for linkat(2). ABI: Reuses CAP_RMDIR bit.
- Allow to be target for renameat(2).
Added CAP_SYMLINKAT:
- Allow for symlinkat(2).
Removed CAP_DELETE. Old behaviour:
- Allow for unlinkat(2) when removing non-directory object.
- Allow to be source for renameat(2).
Removed CAP_RMDIR. Old behaviour:
- Allow for unlinkat(2) when removing directory.
Added CAP_RENAMEAT:
- Required for source directory for the renameat(2) syscall.
Added CAP_UNLINKAT (effectively it replaces CAP_DELETE and CAP_RMDIR):
- Allow for unlinkat(2) on any object.
- Required if target of renameat(2) exists and will be removed by this
call.
Removed CAP_MAPEXEC.
CAP_MMAP old behaviour:
- Allow for mmap(2) with any combination of PROT_NONE, PROT_READ and
PROT_WRITE.
CAP_MMAP new behaviour:
- Allow for mmap(2)+PROT_NONE.
Added CAP_MMAP_R:
- Allow for mmap(PROT_READ).
Added CAP_MMAP_W:
- Allow for mmap(PROT_WRITE).
Added CAP_MMAP_X:
- Allow for mmap(PROT_EXEC).
Added CAP_MMAP_RW:
- Allow for mmap(PROT_READ | PROT_WRITE).
Added CAP_MMAP_RX:
- Allow for mmap(PROT_READ | PROT_EXEC).
Added CAP_MMAP_WX:
- Allow for mmap(PROT_WRITE | PROT_EXEC).
Added CAP_MMAP_RWX:
- Allow for mmap(PROT_READ | PROT_WRITE | PROT_EXEC).
Renamed CAP_MKDIR to CAP_MKDIRAT.
Renamed CAP_MKFIFO to CAP_MKFIFOAT.
Renamed CAP_MKNODE to CAP_MKNODEAT.
CAP_READ old behaviour:
- Allow pread(2).
- Disallow read(2), readv(2) (if there is no CAP_SEEK).
CAP_READ new behaviour:
- Allow read(2), readv(2).
- Disallow pread(2) (CAP_SEEK was also required).
CAP_WRITE old behaviour:
- Allow pwrite(2).
- Disallow write(2), writev(2) (if there is no CAP_SEEK).
CAP_WRITE new behaviour:
- Allow write(2), writev(2).
- Disallow pwrite(2) (CAP_SEEK was also required).
Added convinient defines:
#define CAP_PREAD (CAP_SEEK | CAP_READ)
#define CAP_PWRITE (CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_R (CAP_MMAP | CAP_SEEK | CAP_READ)
#define CAP_MMAP_W (CAP_MMAP | CAP_SEEK | CAP_WRITE)
#define CAP_MMAP_X (CAP_MMAP | CAP_SEEK | 0x0000000000000008ULL)
#define CAP_MMAP_RW (CAP_MMAP_R | CAP_MMAP_W)
#define CAP_MMAP_RX (CAP_MMAP_R | CAP_MMAP_X)
#define CAP_MMAP_WX (CAP_MMAP_W | CAP_MMAP_X)
#define CAP_MMAP_RWX (CAP_MMAP_R | CAP_MMAP_W | CAP_MMAP_X)
#define CAP_RECV CAP_READ
#define CAP_SEND CAP_WRITE
#define CAP_SOCK_CLIENT \
(CAP_CONNECT | CAP_GETPEERNAME | CAP_GETSOCKNAME | CAP_GETSOCKOPT | \
CAP_PEELOFF | CAP_RECV | CAP_SEND | CAP_SETSOCKOPT | CAP_SHUTDOWN)
#define CAP_SOCK_SERVER \
(CAP_ACCEPT | CAP_BIND | CAP_GETPEERNAME | CAP_GETSOCKNAME | \
CAP_GETSOCKOPT | CAP_LISTEN | CAP_PEELOFF | CAP_RECV | CAP_SEND | \
CAP_SETSOCKOPT | CAP_SHUTDOWN)
Added defines for backward API compatibility:
#define CAP_MAPEXEC CAP_MMAP_X
#define CAP_DELETE CAP_UNLINKAT
#define CAP_MKDIR CAP_MKDIRAT
#define CAP_RMDIR CAP_UNLINKAT
#define CAP_MKFIFO CAP_MKFIFOAT
#define CAP_MKNOD CAP_MKNODAT
#define CAP_SOCK_ALL (CAP_SOCK_CLIENT | CAP_SOCK_SERVER)
Sponsored by: The FreeBSD Foundation
Reviewed by: Christoph Mallon <christoph.mallon@gmx.de>
Many aspects discussed with: rwatson, benl, jonathan
ABI compatibility discussed with: kib
2013-03-02 00:53:12 +00:00
|
|
|
if (error != 0)
|
2011-08-11 12:30:23 +00:00
|
|
|
return (error);
|
|
|
|
if (fp->f_type != DTYPE_SOCKET) {
|
2015-04-11 16:00:33 +00:00
|
|
|
fdrop(fp, td);
|
2016-09-22 09:58:46 +00:00
|
|
|
if (havecapsp != NULL)
|
|
|
|
filecaps_free(havecapsp);
|
2011-08-11 12:30:23 +00:00
|
|
|
return (ENOTSOCK);
|
|
|
|
}
|
|
|
|
if (fflagp != NULL)
|
|
|
|
*fflagp = fp->f_flag;
|
2004-10-24 23:45:01 +00:00
|
|
|
*fpp = fp;
|
2011-08-11 12:30:23 +00:00
|
|
|
return (0);
|
2004-10-24 23:45:01 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* System call interface to the socket abstraction.
|
|
|
|
*/
|
2004-06-11 11:16:26 +00:00
|
|
|
#if defined(COMPAT_43)
|
1994-05-24 10:09:53 +00:00
|
|
|
#define COMPAT_OLDSOCK
|
|
|
|
#endif
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_socket(struct thread *td, struct socket_args *uap)
|
2017-01-30 12:57:22 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
return (kern_socket(td, uap->domain, uap->type, uap->protocol));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_socket(struct thread *td, int domain, int type, int protocol)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct socket *so;
|
|
|
|
struct file *fp;
|
2017-01-30 12:57:22 +00:00
|
|
|
int fd, error, oflag, fflag;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2017-01-30 12:57:22 +00:00
|
|
|
AUDIT_ARG_SOCKET(domain, type, protocol);
|
2013-03-19 20:58:17 +00:00
|
|
|
|
|
|
|
oflag = 0;
|
|
|
|
fflag = 0;
|
|
|
|
if ((type & SOCK_CLOEXEC) != 0) {
|
|
|
|
type &= ~SOCK_CLOEXEC;
|
|
|
|
oflag |= O_CLOEXEC;
|
|
|
|
}
|
|
|
|
if ((type & SOCK_NONBLOCK) != 0) {
|
|
|
|
type &= ~SOCK_NONBLOCK;
|
|
|
|
fflag |= FNONBLOCK;
|
|
|
|
}
|
|
|
|
|
2005-07-05 22:49:10 +00:00
|
|
|
#ifdef MAC
|
2017-01-30 12:57:22 +00:00
|
|
|
error = mac_socket_check_create(td->td_ucred, domain, type, protocol);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2005-07-05 22:49:10 +00:00
|
|
|
return (error);
|
|
|
|
#endif
|
2013-03-19 20:58:17 +00:00
|
|
|
error = falloc(td, &fp, &fd, oflag);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-03-04 01:57:48 +00:00
|
|
|
return (error);
|
2003-10-19 20:41:07 +00:00
|
|
|
/* An extra reference on `fp' has been held for us by falloc(). */
|
2017-01-30 12:57:22 +00:00
|
|
|
error = socreate(domain, &so, type, protocol, td->td_ucred, td);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2015-04-11 15:40:28 +00:00
|
|
|
fdclose(td, fp, fd);
|
1994-05-24 10:09:53 +00:00
|
|
|
} else {
|
2013-03-19 20:58:17 +00:00
|
|
|
finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops);
|
|
|
|
if ((fflag & FNONBLOCK) != 0)
|
|
|
|
(void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td);
|
2001-09-12 08:38:13 +00:00
|
|
|
td->td_retval[0] = fd;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2001-09-12 08:38:13 +00:00
|
|
|
fdrop(fp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_bind(struct thread *td, struct bind_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
1997-08-16 19:16:27 +00:00
|
|
|
struct sockaddr *sa;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2013-03-02 21:11:30 +00:00
|
|
|
error = getsockaddr(&sa, uap->name, uap->namelen);
|
|
|
|
if (error == 0) {
|
2014-11-13 18:01:51 +00:00
|
|
|
error = kern_bindat(td, AT_FDCWD, uap->s, sa);
|
2013-03-02 21:11:30 +00:00
|
|
|
free(sa, M_SONAME);
|
|
|
|
}
|
2006-07-19 18:28:52 +00:00
|
|
|
return (error);
|
2003-02-03 17:36:52 +00:00
|
|
|
}
|
|
|
|
|
2014-11-13 18:01:51 +00:00
|
|
|
int
|
2013-03-02 21:11:30 +00:00
|
|
|
kern_bindat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
|
2003-02-03 17:36:52 +00:00
|
|
|
{
|
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2003-02-03 17:36:52 +00:00
|
|
|
int error;
|
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(fd);
|
2013-03-02 21:11:30 +00:00
|
|
|
AUDIT_ARG_SOCKADDR(td, dirfd, sa);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_BIND),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2004-10-24 23:45:01 +00:00
|
|
|
so = fp->f_data;
|
2008-02-23 01:01:49 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(sa);
|
|
|
|
#endif
|
2002-07-31 16:39:49 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_socket_check_bind(td->td_ucred, so, sa);
|
2013-03-02 21:11:30 +00:00
|
|
|
if (error == 0) {
|
|
|
|
#endif
|
|
|
|
if (dirfd == AT_FDCWD)
|
|
|
|
error = sobind(so, sa, td);
|
|
|
|
else
|
|
|
|
error = sobindat(dirfd, so, sa, td);
|
|
|
|
#ifdef MAC
|
|
|
|
}
|
2003-02-03 17:36:52 +00:00
|
|
|
#endif
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2013-03-02 21:11:30 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_bindat(struct thread *td, struct bindat_args *uap)
|
2013-03-02 21:11:30 +00:00
|
|
|
{
|
|
|
|
struct sockaddr *sa;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = getsockaddr(&sa, uap->name, uap->namelen);
|
|
|
|
if (error == 0) {
|
|
|
|
error = kern_bindat(td, uap->fd, uap->s, sa);
|
|
|
|
free(sa, M_SONAME);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_listen(struct thread *td, struct listen_args *uap)
|
2017-01-30 12:57:22 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
return (kern_listen(td, uap->s, uap->backlog));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_listen(struct thread *td, int s, int backlog)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2002-01-09 02:47:00 +00:00
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2017-01-30 12:57:22 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
|
|
|
error = getsock_cap(td, s, cap_rights_init(&rights, CAP_LISTEN),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2004-10-24 23:45:01 +00:00
|
|
|
if (error == 0) {
|
|
|
|
so = fp->f_data;
|
2002-07-31 16:39:49 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_socket_check_listen(td->td_ucred, so);
|
2011-02-16 21:29:13 +00:00
|
|
|
if (error == 0)
|
2002-07-31 16:39:49 +00:00
|
|
|
#endif
|
2017-01-30 12:57:22 +00:00
|
|
|
error = solisten(so, backlog, td);
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
}
|
2017-01-30 12:57:22 +00:00
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
/*
|
|
|
|
* accept1()
|
|
|
|
*/
|
1995-10-07 23:47:26 +00:00
|
|
|
static int
|
2013-05-01 20:10:21 +00:00
|
|
|
accept1(td, s, uname, anamelen, flags)
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td;
|
2013-05-01 20:10:21 +00:00
|
|
|
int s;
|
|
|
|
struct sockaddr *uname;
|
|
|
|
socklen_t *anamelen;
|
|
|
|
int flags;
|
2006-07-10 21:38:17 +00:00
|
|
|
{
|
|
|
|
struct sockaddr *name;
|
|
|
|
socklen_t namelen;
|
2006-07-27 19:54:41 +00:00
|
|
|
struct file *fp;
|
2006-07-10 21:38:17 +00:00
|
|
|
int error;
|
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
if (uname == NULL)
|
|
|
|
return (kern_accept4(td, s, NULL, NULL, flags, NULL));
|
2006-07-10 21:38:17 +00:00
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
error = copyin(anamelen, &namelen, sizeof (namelen));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
error = kern_accept4(td, s, &name, &namelen, flags, &fp);
|
2006-07-10 21:38:17 +00:00
|
|
|
|
2014-05-11 21:21:14 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
if (error == 0 && uname != NULL) {
|
2006-07-10 21:38:17 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
2013-05-01 20:10:21 +00:00
|
|
|
if (flags & ACCEPT4_COMPAT)
|
2006-07-10 21:38:17 +00:00
|
|
|
((struct osockaddr *)name)->sa_family =
|
|
|
|
name->sa_family;
|
|
|
|
#endif
|
2013-05-01 20:10:21 +00:00
|
|
|
error = copyout(name, uname, namelen);
|
2006-07-10 21:38:17 +00:00
|
|
|
}
|
|
|
|
if (error == 0)
|
2013-05-01 20:10:21 +00:00
|
|
|
error = copyout(&namelen, anamelen,
|
2006-07-10 21:38:17 +00:00
|
|
|
sizeof(namelen));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2015-04-11 15:40:28 +00:00
|
|
|
fdclose(td, fp, td->td_retval[0]);
|
2006-07-27 19:54:41 +00:00
|
|
|
fdrop(fp, td);
|
2006-07-10 21:38:17 +00:00
|
|
|
free(name, M_SONAME);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_accept(struct thread *td, int s, struct sockaddr **name,
|
2006-07-27 19:54:41 +00:00
|
|
|
socklen_t *namelen, struct file **fp)
|
2013-05-01 20:10:21 +00:00
|
|
|
{
|
|
|
|
return (kern_accept4(td, s, name, namelen, ACCEPT4_INHERIT, fp));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_accept4(struct thread *td, int s, struct sockaddr **name,
|
|
|
|
socklen_t *namelen, int flags, struct file **fp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2006-04-25 11:48:16 +00:00
|
|
|
struct file *headfp, *nfp = NULL;
|
Correct a resource leak introduced in recent accept locking changes:
when I reordered events in accept1() to allocate a file descriptor
earlier, I didn't properly update use of goto on exit to unwind for
cases where the file descriptor is now held, but wasn't previously.
The result was that, in the event of accept() on a non-blocking socket,
or in the event of a socket error, a file descriptor would be leaked.
This ended up being non-fatal in many cases, as the file descriptor
would be properly GC'd on process exit, so only showed up for processes
that do a lot of non-blocking accept() calls, and also live for a long
time (such as qmail).
This change updates the use of goto targets to do additional unwinding.
Eyes provided by: Brian Feldman <green@freebsd.org>
Feet, hands provided by: Stefan Ehmann <shoesoft@gmx.net>,
Dimitry Andric <dimitry@andric.com>
Arjan van Leeuwen <avleeuwen@piwebs.com>
2004-06-07 21:45:44 +00:00
|
|
|
struct sockaddr *sa = NULL;
|
1996-03-11 15:37:44 +00:00
|
|
|
struct socket *head, *so;
|
2016-09-22 09:58:46 +00:00
|
|
|
struct filecaps fcaps;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2001-11-17 03:07:11 +00:00
|
|
|
u_int fflag;
|
2002-10-03 02:13:00 +00:00
|
|
|
pid_t pgid;
|
2013-09-05 00:17:38 +00:00
|
|
|
int error, fd, tmp;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2013-09-05 00:17:38 +00:00
|
|
|
if (name != NULL)
|
2006-07-10 21:38:17 +00:00
|
|
|
*name = NULL;
|
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, s, cap_rights_init(&rights, CAP_ACCEPT),
|
2016-09-22 09:58:46 +00:00
|
|
|
&headfp, &fflag, &fcaps);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2006-04-25 11:48:16 +00:00
|
|
|
head = headfp->f_data;
|
1996-03-11 15:37:44 +00:00
|
|
|
if ((head->so_options & SO_ACCEPTCONN) == 0) {
|
2000-11-18 21:01:04 +00:00
|
|
|
error = EINVAL;
|
|
|
|
goto done;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2005-04-16 18:46:29 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_socket_check_accept(td->td_ucred, head);
|
2005-04-16 18:46:29 +00:00
|
|
|
if (error != 0)
|
|
|
|
goto done;
|
|
|
|
#endif
|
2016-09-22 09:58:46 +00:00
|
|
|
error = falloc_caps(td, &nfp, &fd,
|
|
|
|
(flags & SOCK_CLOEXEC) ? O_CLOEXEC : 0, &fcaps);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
2004-06-02 04:15:39 +00:00
|
|
|
goto done;
|
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from
fields that belong to normal dataflow, and unionize them. This
shrinks the structure a bit.
- Take out selinfo's from the socket buffers into the socket. The
first reason is to support braindamaged scenario when a socket is
added to kevent(2) and then listen(2) is cast on it. The second
reason is that there is future plan to make socket buffers pluggable,
so that for a dataflow socket a socket buffer can be changed, and
in this case we also want to keep same selinfos through the lifetime
of a socket.
- Remove struct struct so_accf. Since now listening stuff no longer
affects struct socket size, just move its fields into listening part
of the union.
- Provide sol_upcall field and enforce that so_upcall_set() may be called
only on a dataflow socket, which has buffers, and for listening sockets
provide solisten_upcall_set().
o Remove ACCEPT_LOCK() global.
- Add a mutex to socket, to be used instead of socket buffer lock to lock
fields of struct socket that don't belong to a socket buffer.
- Allow to acquire two socket locks, but the first one must belong to a
listening socket.
- Make soref()/sorele() to use atomic(9). This allows in some situations
to do soref() without owning socket lock. There is place for improvement
here, it is possible to make sorele() also to lock optionally.
- Most protocols aren't touched by this change, except UNIX local sockets.
See below for more information.
o Reduce copy-and-paste in kernel modules that accept connections from
listening sockets: provide function solisten_dequeue(), and use it in
the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
infiniband, rpc.
o UNIX local sockets.
- Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
local sockets. Most races exist around spawning a new socket, when we
are connecting to a local listening socket. To cover them, we need to
hold locks on both PCBs when spawning a third one. This means holding
them across sonewconn(). This creates a LOR between pcb locks and
unp_list_lock.
- To fix the new LOR, abandon the global unp_list_lock in favor of global
unp_link_lock. Indeed, separating these two locks didn't provide us any
extra parralelism in the UNIX sockets.
- Now call into uipc_attach() may happen with unp_link_lock hold if, we
are accepting, or without unp_link_lock in case if we are just creating
a socket.
- Another problem in UNIX sockets is that uipc_close() basicly did nothing
for a listening socket. The vnode remained opened for connections. This
is fixed by removing vnode in uipc_close(). Maybe the right way would be
to do it for all sockets (not only listening), simply move the vnode
teardown from uipc_detach() to uipc_close()?
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D9770
2017-06-08 21:30:34 +00:00
|
|
|
SOCK_LOCK(head);
|
|
|
|
if (!SOLISTENING(head)) {
|
|
|
|
SOCK_UNLOCK(head);
|
|
|
|
error = EINVAL;
|
Correct a resource leak introduced in recent accept locking changes:
when I reordered events in accept1() to allocate a file descriptor
earlier, I didn't properly update use of goto on exit to unwind for
cases where the file descriptor is now held, but wasn't previously.
The result was that, in the event of accept() on a non-blocking socket,
or in the event of a socket error, a file descriptor would be leaked.
This ended up being non-fatal in many cases, as the file descriptor
would be properly GC'd on process exit, so only showed up for processes
that do a lot of non-blocking accept() calls, and also live for a long
time (such as qmail).
This change updates the use of goto targets to do additional unwinding.
Eyes provided by: Brian Feldman <green@freebsd.org>
Feet, hands provided by: Stefan Ehmann <shoesoft@gmx.net>,
Dimitry Andric <dimitry@andric.com>
Arjan van Leeuwen <avleeuwen@piwebs.com>
2004-06-07 21:45:44 +00:00
|
|
|
goto noconnection;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
Integrate accept locking from rwatson_netperf, introducing a new
global mutex, accept_mtx, which serializes access to the following
fields across all sockets:
so_qlen so_incqlen so_qstate
so_comp so_incomp so_list
so_head
While providing only coarse granularity, this approach avoids lock
order issues between sockets by avoiding ownership of the fields
by a specific socket and its per-socket mutexes.
While here, rewrite soclose(), sofree(), soaccept(), and
sonewconn() to add assertions, close additional races and address
lock order concerns. In particular:
- Reorganize the optimistic concurrency behavior in accept1() to
always allocate a file descriptor with falloc() so that if we do
find a socket, we don't have to encounter the "Oh, there wasn't
a socket" race that can occur if falloc() sleeps in the current
code, which broke inbound accept() ordering, not to mention
requiring backing out socket state changes in a way that raced
with the protocol level. We may want to add a lockless read of
the queue state if polling of empty queues proves to be important
to optimize.
- In accept1(), soref() the socket while holding the accept lock
so that the socket cannot be free'd in a race with the protocol
layer. Likewise in netgraph equivilents of the accept1() code.
- In sonewconn(), loop waiting for the queue to be small enough to
insert our new socket once we've committed to inserting it, or
races can occur that cause the incomplete socket queue to
overfill. In the previously implementation, it was sufficient
to simply tested once since calling soabort() didn't release
synchronization permitting another thread to insert a socket as
we discard a previous one.
- In soclose()/sofree()/et al, it is the responsibility of the
caller to remove a socket from the incomplete connection queue
before calling soabort(), which prevents soabort() from having
to walk into the accept socket to release the socket from its
queue, and avoids races when releasing the accept mutex to enter
soabort(), permitting soabort() to avoid lock ordering issues
with the caller.
- Generally cluster accept queue related operations together
throughout these functions in order to facilitate locking.
Annotate new locking in socketvar.h.
2004-06-02 04:15:39 +00:00
|
|
|
|
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from
fields that belong to normal dataflow, and unionize them. This
shrinks the structure a bit.
- Take out selinfo's from the socket buffers into the socket. The
first reason is to support braindamaged scenario when a socket is
added to kevent(2) and then listen(2) is cast on it. The second
reason is that there is future plan to make socket buffers pluggable,
so that for a dataflow socket a socket buffer can be changed, and
in this case we also want to keep same selinfos through the lifetime
of a socket.
- Remove struct struct so_accf. Since now listening stuff no longer
affects struct socket size, just move its fields into listening part
of the union.
- Provide sol_upcall field and enforce that so_upcall_set() may be called
only on a dataflow socket, which has buffers, and for listening sockets
provide solisten_upcall_set().
o Remove ACCEPT_LOCK() global.
- Add a mutex to socket, to be used instead of socket buffer lock to lock
fields of struct socket that don't belong to a socket buffer.
- Allow to acquire two socket locks, but the first one must belong to a
listening socket.
- Make soref()/sorele() to use atomic(9). This allows in some situations
to do soref() without owning socket lock. There is place for improvement
here, it is possible to make sorele() also to lock optionally.
- Most protocols aren't touched by this change, except UNIX local sockets.
See below for more information.
o Reduce copy-and-paste in kernel modules that accept connections from
listening sockets: provide function solisten_dequeue(), and use it in
the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
infiniband, rpc.
o UNIX local sockets.
- Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
local sockets. Most races exist around spawning a new socket, when we
are connecting to a local listening socket. To cover them, we need to
hold locks on both PCBs when spawning a third one. This means holding
them across sonewconn(). This creates a LOR between pcb locks and
unp_list_lock.
- To fix the new LOR, abandon the global unp_list_lock in favor of global
unp_link_lock. Indeed, separating these two locks didn't provide us any
extra parralelism in the UNIX sockets.
- Now call into uipc_attach() may happen with unp_link_lock hold if, we
are accepting, or without unp_link_lock in case if we are just creating
a socket.
- Another problem in UNIX sockets is that uipc_close() basicly did nothing
for a listening socket. The vnode remained opened for connections. This
is fixed by removing vnode in uipc_close(). Maybe the right way would be
to do it for all sockets (not only listening), simply move the vnode
teardown from uipc_detach() to uipc_close()?
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D9770
2017-06-08 21:30:34 +00:00
|
|
|
error = solisten_dequeue(head, &so, flags);
|
|
|
|
if (error != 0)
|
|
|
|
goto noconnection;
|
1997-03-31 12:30:01 +00:00
|
|
|
|
2003-10-19 20:41:07 +00:00
|
|
|
/* An extra reference on `nfp' has been held for us by falloc(). */
|
2001-09-12 08:38:13 +00:00
|
|
|
td->td_retval[0] = fd;
|
1996-03-11 15:37:44 +00:00
|
|
|
|
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from
fields that belong to normal dataflow, and unionize them. This
shrinks the structure a bit.
- Take out selinfo's from the socket buffers into the socket. The
first reason is to support braindamaged scenario when a socket is
added to kevent(2) and then listen(2) is cast on it. The second
reason is that there is future plan to make socket buffers pluggable,
so that for a dataflow socket a socket buffer can be changed, and
in this case we also want to keep same selinfos through the lifetime
of a socket.
- Remove struct struct so_accf. Since now listening stuff no longer
affects struct socket size, just move its fields into listening part
of the union.
- Provide sol_upcall field and enforce that so_upcall_set() may be called
only on a dataflow socket, which has buffers, and for listening sockets
provide solisten_upcall_set().
o Remove ACCEPT_LOCK() global.
- Add a mutex to socket, to be used instead of socket buffer lock to lock
fields of struct socket that don't belong to a socket buffer.
- Allow to acquire two socket locks, but the first one must belong to a
listening socket.
- Make soref()/sorele() to use atomic(9). This allows in some situations
to do soref() without owning socket lock. There is place for improvement
here, it is possible to make sorele() also to lock optionally.
- Most protocols aren't touched by this change, except UNIX local sockets.
See below for more information.
o Reduce copy-and-paste in kernel modules that accept connections from
listening sockets: provide function solisten_dequeue(), and use it in
the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
infiniband, rpc.
o UNIX local sockets.
- Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
local sockets. Most races exist around spawning a new socket, when we
are connecting to a local listening socket. To cover them, we need to
hold locks on both PCBs when spawning a third one. This means holding
them across sonewconn(). This creates a LOR between pcb locks and
unp_list_lock.
- To fix the new LOR, abandon the global unp_list_lock in favor of global
unp_link_lock. Indeed, separating these two locks didn't provide us any
extra parralelism in the UNIX sockets.
- Now call into uipc_attach() may happen with unp_link_lock hold if, we
are accepting, or without unp_link_lock in case if we are just creating
a socket.
- Another problem in UNIX sockets is that uipc_close() basicly did nothing
for a listening socket. The vnode remained opened for connections. This
is fixed by removing vnode in uipc_close(). Maybe the right way would be
to do it for all sockets (not only listening), simply move the vnode
teardown from uipc_detach() to uipc_close()?
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D9770
2017-06-08 21:30:34 +00:00
|
|
|
/* Connection has been removed from the listen queue. */
|
|
|
|
KNOTE_UNLOCKED(&head->so_rdsel.si_note, 0);
|
2000-04-16 18:53:38 +00:00
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
if (flags & ACCEPT4_INHERIT) {
|
|
|
|
pgid = fgetown(&head->so_sigio);
|
|
|
|
if (pgid != 0)
|
|
|
|
fsetown(pgid, &so->so_sigio);
|
|
|
|
} else {
|
|
|
|
fflag &= ~(FNONBLOCK | FASYNC);
|
|
|
|
if (flags & SOCK_NONBLOCK)
|
|
|
|
fflag |= FNONBLOCK;
|
|
|
|
}
|
1996-03-11 15:37:44 +00:00
|
|
|
|
2007-12-30 01:42:15 +00:00
|
|
|
finit(nfp, fflag, DTYPE_SOCKET, so, &socketops);
|
2003-02-23 23:00:28 +00:00
|
|
|
/* Sync socket nonblocking/async state with file flags */
|
|
|
|
tmp = fflag & FNONBLOCK;
|
|
|
|
(void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
|
|
|
|
tmp = fflag & FASYNC;
|
|
|
|
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
|
2001-02-14 02:09:11 +00:00
|
|
|
error = soaccept(so, &sa);
|
2014-05-11 21:21:14 +00:00
|
|
|
if (error != 0)
|
2001-02-14 02:09:11 +00:00
|
|
|
goto noconnection;
|
2000-11-18 21:01:04 +00:00
|
|
|
if (sa == NULL) {
|
2006-07-10 21:38:17 +00:00
|
|
|
if (name)
|
|
|
|
*namelen = 0;
|
2000-11-18 21:01:04 +00:00
|
|
|
goto done;
|
1997-08-16 19:16:27 +00:00
|
|
|
}
|
2013-03-02 21:11:30 +00:00
|
|
|
AUDIT_ARG_SOCKADDR(td, AT_FDCWD, sa);
|
2006-07-10 21:38:17 +00:00
|
|
|
if (name) {
|
1997-12-15 02:29:11 +00:00
|
|
|
/* check sa_len before it is destroyed */
|
2006-07-10 21:38:17 +00:00
|
|
|
if (*namelen > sa->sa_len)
|
|
|
|
*namelen = sa->sa_len;
|
2008-02-23 01:01:49 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(sa);
|
|
|
|
#endif
|
2006-07-10 21:38:17 +00:00
|
|
|
*name = sa;
|
|
|
|
sa = NULL;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2001-02-14 02:09:11 +00:00
|
|
|
noconnection:
|
2013-09-05 00:17:38 +00:00
|
|
|
free(sa, M_SONAME);
|
2000-11-18 21:01:04 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* close the new descriptor, assuming someone hasn't ripped it
|
|
|
|
* out from under us.
|
|
|
|
*/
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2015-04-11 15:40:28 +00:00
|
|
|
fdclose(td, nfp, fd);
|
2000-11-18 21:01:04 +00:00
|
|
|
|
|
|
|
/*
|
2006-07-27 19:54:41 +00:00
|
|
|
* Release explicitly held references before returning. We return
|
|
|
|
* a reference on nfp to the caller on success if they request it.
|
2000-11-18 21:01:04 +00:00
|
|
|
*/
|
|
|
|
done:
|
2016-09-22 09:58:46 +00:00
|
|
|
if (nfp == NULL)
|
|
|
|
filecaps_free(&fcaps);
|
2006-07-27 19:54:41 +00:00
|
|
|
if (fp != NULL) {
|
|
|
|
if (error == 0) {
|
|
|
|
*fp = nfp;
|
|
|
|
nfp = NULL;
|
|
|
|
} else
|
|
|
|
*fp = NULL;
|
|
|
|
}
|
2000-11-18 21:01:04 +00:00
|
|
|
if (nfp != NULL)
|
2001-09-12 08:38:13 +00:00
|
|
|
fdrop(nfp, td);
|
2006-04-25 11:48:16 +00:00
|
|
|
fdrop(headfp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1995-10-23 15:42:12 +00:00
|
|
|
int
|
2011-09-16 13:58:51 +00:00
|
|
|
sys_accept(td, uap)
|
2001-09-12 08:38:13 +00:00
|
|
|
struct thread *td;
|
1995-10-23 15:42:12 +00:00
|
|
|
struct accept_args *uap;
|
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
return (accept1(td, uap->s, uap->name, uap->anamelen, ACCEPT4_INHERIT));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
sys_accept4(td, uap)
|
|
|
|
struct thread *td;
|
|
|
|
struct accept4_args *uap;
|
|
|
|
{
|
2013-09-05 00:17:38 +00:00
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
if (uap->flags & ~(SOCK_CLOEXEC | SOCK_NONBLOCK))
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
return (accept1(td, uap->s, uap->name, uap->anamelen, uap->flags));
|
1995-10-23 15:42:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
int
|
2001-09-12 08:38:13 +00:00
|
|
|
oaccept(td, uap)
|
|
|
|
struct thread *td;
|
1995-10-23 15:42:12 +00:00
|
|
|
struct accept_args *uap;
|
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
|
2013-05-01 20:10:21 +00:00
|
|
|
return (accept1(td, uap->s, uap->name, uap->anamelen,
|
|
|
|
ACCEPT4_INHERIT | ACCEPT4_COMPAT));
|
1995-10-23 15:42:12 +00:00
|
|
|
}
|
|
|
|
#endif /* COMPAT_OLDSOCK */
|
1994-10-02 17:35:40 +00:00
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_connect(struct thread *td, struct connect_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
1997-08-16 19:16:27 +00:00
|
|
|
struct sockaddr *sa;
|
2003-02-03 17:36:52 +00:00
|
|
|
int error;
|
|
|
|
|
|
|
|
error = getsockaddr(&sa, uap->name, uap->namelen);
|
2013-03-02 21:11:30 +00:00
|
|
|
if (error == 0) {
|
2014-11-13 18:01:51 +00:00
|
|
|
error = kern_connectat(td, AT_FDCWD, uap->s, sa);
|
2013-03-02 21:11:30 +00:00
|
|
|
free(sa, M_SONAME);
|
|
|
|
}
|
2006-07-19 18:28:52 +00:00
|
|
|
return (error);
|
2003-02-03 17:36:52 +00:00
|
|
|
}
|
|
|
|
|
2014-11-13 18:01:51 +00:00
|
|
|
int
|
2013-03-02 21:11:30 +00:00
|
|
|
kern_connectat(struct thread *td, int dirfd, int fd, struct sockaddr *sa)
|
2003-02-03 17:36:52 +00:00
|
|
|
{
|
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2013-09-05 00:17:38 +00:00
|
|
|
int error, interrupted = 0;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(fd);
|
2013-03-02 21:11:30 +00:00
|
|
|
AUDIT_ARG_SOCKADDR(td, dirfd, sa);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_CONNECT),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2004-10-24 23:45:01 +00:00
|
|
|
so = fp->f_data;
|
2003-08-06 14:04:47 +00:00
|
|
|
if (so->so_state & SS_ISCONNECTING) {
|
2000-11-18 21:01:04 +00:00
|
|
|
error = EALREADY;
|
2001-11-17 03:07:11 +00:00
|
|
|
goto done1;
|
2000-11-18 21:01:04 +00:00
|
|
|
}
|
2008-02-23 01:01:49 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(sa);
|
|
|
|
#endif
|
2002-07-31 16:39:49 +00:00
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_socket_check_connect(td->td_ucred, so, sa);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2002-07-31 16:39:49 +00:00
|
|
|
goto bad;
|
|
|
|
#endif
|
2013-03-02 21:11:30 +00:00
|
|
|
if (dirfd == AT_FDCWD)
|
|
|
|
error = soconnect(so, sa, td);
|
|
|
|
else
|
|
|
|
error = soconnectat(dirfd, so, sa, td);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto bad;
|
|
|
|
if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
|
2000-11-18 21:01:04 +00:00
|
|
|
error = EINPROGRESS;
|
2001-11-17 03:07:11 +00:00
|
|
|
goto done1;
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2004-06-24 01:43:23 +00:00
|
|
|
SOCK_LOCK(so);
|
1994-10-02 17:35:40 +00:00
|
|
|
while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
|
Listening sockets improvements.
o Separate fields of struct socket that belong to listening from
fields that belong to normal dataflow, and unionize them. This
shrinks the structure a bit.
- Take out selinfo's from the socket buffers into the socket. The
first reason is to support braindamaged scenario when a socket is
added to kevent(2) and then listen(2) is cast on it. The second
reason is that there is future plan to make socket buffers pluggable,
so that for a dataflow socket a socket buffer can be changed, and
in this case we also want to keep same selinfos through the lifetime
of a socket.
- Remove struct struct so_accf. Since now listening stuff no longer
affects struct socket size, just move its fields into listening part
of the union.
- Provide sol_upcall field and enforce that so_upcall_set() may be called
only on a dataflow socket, which has buffers, and for listening sockets
provide solisten_upcall_set().
o Remove ACCEPT_LOCK() global.
- Add a mutex to socket, to be used instead of socket buffer lock to lock
fields of struct socket that don't belong to a socket buffer.
- Allow to acquire two socket locks, but the first one must belong to a
listening socket.
- Make soref()/sorele() to use atomic(9). This allows in some situations
to do soref() without owning socket lock. There is place for improvement
here, it is possible to make sorele() also to lock optionally.
- Most protocols aren't touched by this change, except UNIX local sockets.
See below for more information.
o Reduce copy-and-paste in kernel modules that accept connections from
listening sockets: provide function solisten_dequeue(), and use it in
the following modules: ctl(4), iscsi(4), ng_btsocket(4), ng_ksocket(4),
infiniband, rpc.
o UNIX local sockets.
- Removal of ACCEPT_LOCK() global uncovered several races in the UNIX
local sockets. Most races exist around spawning a new socket, when we
are connecting to a local listening socket. To cover them, we need to
hold locks on both PCBs when spawning a third one. This means holding
them across sonewconn(). This creates a LOR between pcb locks and
unp_list_lock.
- To fix the new LOR, abandon the global unp_list_lock in favor of global
unp_link_lock. Indeed, separating these two locks didn't provide us any
extra parralelism in the UNIX sockets.
- Now call into uipc_attach() may happen with unp_link_lock hold if, we
are accepting, or without unp_link_lock in case if we are just creating
a socket.
- Another problem in UNIX sockets is that uipc_close() basicly did nothing
for a listening socket. The vnode remained opened for connections. This
is fixed by removing vnode in uipc_close(). Maybe the right way would be
to do it for all sockets (not only listening), simply move the vnode
teardown from uipc_detach() to uipc_close()?
Sponsored by: Netflix
Differential Revision: https://reviews.freebsd.org/D9770
2017-06-08 21:30:34 +00:00
|
|
|
error = msleep(&so->so_timeo, &so->so_lock, PSOCK | PCATCH,
|
2004-06-24 01:43:23 +00:00
|
|
|
"connec", 0);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2003-08-06 14:04:47 +00:00
|
|
|
if (error == EINTR || error == ERESTART)
|
|
|
|
interrupted = 1;
|
1994-05-24 10:09:53 +00:00
|
|
|
break;
|
2003-08-06 14:04:47 +00:00
|
|
|
}
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
if (error == 0) {
|
|
|
|
error = so->so_error;
|
|
|
|
so->so_error = 0;
|
|
|
|
}
|
2004-06-24 01:43:23 +00:00
|
|
|
SOCK_UNLOCK(so);
|
1994-05-24 10:09:53 +00:00
|
|
|
bad:
|
2003-08-06 14:04:47 +00:00
|
|
|
if (!interrupted)
|
|
|
|
so->so_state &= ~SS_ISCONNECTING;
|
1994-05-24 10:09:53 +00:00
|
|
|
if (error == ERESTART)
|
|
|
|
error = EINTR;
|
2001-11-17 03:07:11 +00:00
|
|
|
done1:
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2013-03-02 21:11:30 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_connectat(struct thread *td, struct connectat_args *uap)
|
2013-03-02 21:11:30 +00:00
|
|
|
{
|
|
|
|
struct sockaddr *sa;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = getsockaddr(&sa, uap->name, uap->namelen);
|
|
|
|
if (error == 0) {
|
|
|
|
error = kern_connectat(td, uap->fd, uap->s, sa);
|
|
|
|
free(sa, M_SONAME);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2009-05-31 12:12:38 +00:00
|
|
|
kern_socketpair(struct thread *td, int domain, int type, int protocol,
|
|
|
|
int *rsv)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct file *fp1, *fp2;
|
|
|
|
struct socket *so1, *so2;
|
2013-03-19 20:58:17 +00:00
|
|
|
int fd, error, oflag, fflag;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-07-01 18:54:49 +00:00
|
|
|
AUDIT_ARG_SOCKET(domain, type, protocol);
|
2013-03-19 20:58:17 +00:00
|
|
|
|
|
|
|
oflag = 0;
|
|
|
|
fflag = 0;
|
|
|
|
if ((type & SOCK_CLOEXEC) != 0) {
|
|
|
|
type &= ~SOCK_CLOEXEC;
|
|
|
|
oflag |= O_CLOEXEC;
|
|
|
|
}
|
|
|
|
if ((type & SOCK_NONBLOCK) != 0) {
|
|
|
|
type &= ~SOCK_NONBLOCK;
|
|
|
|
fflag |= FNONBLOCK;
|
|
|
|
}
|
2005-07-05 22:49:10 +00:00
|
|
|
#ifdef MAC
|
|
|
|
/* We might want to have a separate check for socket pairs. */
|
2009-05-31 12:12:38 +00:00
|
|
|
error = mac_socket_check_create(td->td_ucred, domain, type,
|
|
|
|
protocol);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2005-07-05 22:49:10 +00:00
|
|
|
return (error);
|
|
|
|
#endif
|
2009-05-31 12:12:38 +00:00
|
|
|
error = socreate(domain, &so1, type, protocol, td->td_ucred, td);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2009-05-31 12:12:38 +00:00
|
|
|
error = socreate(domain, &so2, type, protocol, td->td_ucred, td);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto free1;
|
2003-10-19 20:41:07 +00:00
|
|
|
/* On success extra reference to `fp1' and 'fp2' is set by falloc. */
|
2013-03-19 20:58:17 +00:00
|
|
|
error = falloc(td, &fp1, &fd, oflag);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto free2;
|
2009-05-31 12:12:38 +00:00
|
|
|
rsv[0] = fd;
|
2003-01-13 00:33:17 +00:00
|
|
|
fp1->f_data = so1; /* so1 already has ref count */
|
2013-03-19 20:58:17 +00:00
|
|
|
error = falloc(td, &fp2, &fd, oflag);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto free3;
|
2003-01-13 00:33:17 +00:00
|
|
|
fp2->f_data = so2; /* so2 already has ref count */
|
2009-05-31 12:12:38 +00:00
|
|
|
rsv[1] = fd;
|
1994-10-02 17:35:40 +00:00
|
|
|
error = soconnect2(so1, so2);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto free4;
|
2009-05-31 12:12:38 +00:00
|
|
|
if (type == SOCK_DGRAM) {
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Datagram socket connection is asymmetric.
|
|
|
|
*/
|
1994-10-02 17:35:40 +00:00
|
|
|
error = soconnect2(so2, so1);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto free4;
|
|
|
|
}
|
2013-03-19 20:58:17 +00:00
|
|
|
finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
|
|
|
|
&socketops);
|
|
|
|
finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data,
|
|
|
|
&socketops);
|
|
|
|
if ((fflag & FNONBLOCK) != 0) {
|
|
|
|
(void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td);
|
|
|
|
(void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td);
|
|
|
|
}
|
2001-09-12 08:38:13 +00:00
|
|
|
fdrop(fp1, td);
|
|
|
|
fdrop(fp2, td);
|
2007-08-06 14:26:03 +00:00
|
|
|
return (0);
|
1994-05-24 10:09:53 +00:00
|
|
|
free4:
|
2015-04-11 15:40:28 +00:00
|
|
|
fdclose(td, fp2, rsv[1]);
|
2001-09-12 08:38:13 +00:00
|
|
|
fdrop(fp2, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
free3:
|
2015-04-11 15:40:28 +00:00
|
|
|
fdclose(td, fp1, rsv[0]);
|
2001-09-12 08:38:13 +00:00
|
|
|
fdrop(fp1, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
free2:
|
2007-04-02 19:15:47 +00:00
|
|
|
if (so2 != NULL)
|
|
|
|
(void)soclose(so2);
|
1994-05-24 10:09:53 +00:00
|
|
|
free1:
|
2007-04-02 19:15:47 +00:00
|
|
|
if (so1 != NULL)
|
|
|
|
(void)soclose(so1);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2009-05-31 12:12:38 +00:00
|
|
|
int
|
2011-09-16 13:58:51 +00:00
|
|
|
sys_socketpair(struct thread *td, struct socketpair_args *uap)
|
2009-05-31 12:12:38 +00:00
|
|
|
{
|
|
|
|
int error, sv[2];
|
|
|
|
|
|
|
|
error = kern_socketpair(td, uap->domain, uap->type,
|
|
|
|
uap->protocol, sv);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2009-05-31 12:12:38 +00:00
|
|
|
return (error);
|
|
|
|
error = copyout(sv, uap->rsv, 2 * sizeof(int));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2009-05-31 12:12:38 +00:00
|
|
|
(void)kern_close(td, sv[0]);
|
|
|
|
(void)kern_close(td, sv[1]);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1998-02-09 06:11:36 +00:00
|
|
|
static int
|
2016-09-09 17:40:26 +00:00
|
|
|
sendit(struct thread *td, int s, struct msghdr *mp, int flags)
|
1994-10-02 17:35:40 +00:00
|
|
|
{
|
2016-09-26 15:45:30 +00:00
|
|
|
struct mbuf *control;
|
2003-05-05 20:33:38 +00:00
|
|
|
struct sockaddr *to;
|
|
|
|
int error;
|
2002-10-06 14:39:15 +00:00
|
|
|
|
2011-06-30 10:56:02 +00:00
|
|
|
#ifdef CAPABILITY_MODE
|
|
|
|
if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL))
|
|
|
|
return (ECAPMODE);
|
|
|
|
#endif
|
|
|
|
|
2003-05-05 20:33:38 +00:00
|
|
|
if (mp->msg_name != NULL) {
|
1997-08-16 19:16:27 +00:00
|
|
|
error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2003-05-29 18:36:26 +00:00
|
|
|
to = NULL;
|
|
|
|
goto bad;
|
|
|
|
}
|
2003-05-05 20:33:38 +00:00
|
|
|
mp->msg_name = to;
|
2004-01-11 19:56:42 +00:00
|
|
|
} else {
|
2003-05-05 20:33:38 +00:00
|
|
|
to = NULL;
|
2004-01-11 19:56:42 +00:00
|
|
|
}
|
2003-05-05 20:33:38 +00:00
|
|
|
|
1994-10-02 17:35:40 +00:00
|
|
|
if (mp->msg_control) {
|
|
|
|
if (mp->msg_controllen < sizeof(struct cmsghdr)
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
&& mp->msg_flags != MSG_COMPAT
|
|
|
|
#endif
|
|
|
|
) {
|
|
|
|
error = EINVAL;
|
|
|
|
goto bad;
|
|
|
|
}
|
|
|
|
error = sockargs(&control, mp->msg_control,
|
|
|
|
mp->msg_controllen, MT_CONTROL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-10-02 17:35:40 +00:00
|
|
|
goto bad;
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
if (mp->msg_flags == MSG_COMPAT) {
|
2007-05-16 20:41:08 +00:00
|
|
|
struct cmsghdr *cm;
|
1994-10-02 17:35:40 +00:00
|
|
|
|
2012-12-05 08:04:20 +00:00
|
|
|
M_PREPEND(control, sizeof(*cm), M_WAITOK);
|
2008-03-25 09:39:02 +00:00
|
|
|
cm = mtod(control, struct cmsghdr *);
|
|
|
|
cm->cmsg_len = control->m_len;
|
|
|
|
cm->cmsg_level = SOL_SOCKET;
|
|
|
|
cm->cmsg_type = SCM_RIGHTS;
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
|
|
|
#endif
|
2000-11-18 21:01:04 +00:00
|
|
|
} else {
|
2003-05-05 20:33:38 +00:00
|
|
|
control = NULL;
|
|
|
|
}
|
|
|
|
|
2005-01-30 07:20:36 +00:00
|
|
|
error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
|
2003-05-05 20:33:38 +00:00
|
|
|
|
|
|
|
bad:
|
2013-09-05 00:17:38 +00:00
|
|
|
free(to, M_SONAME);
|
2003-05-05 20:33:38 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
kern_sendit(struct thread *td, int s, struct msghdr *mp, int flags,
|
|
|
|
struct mbuf *control, enum uio_seg segflg)
|
2003-05-05 20:33:38 +00:00
|
|
|
{
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
2003-05-05 20:33:38 +00:00
|
|
|
struct uio auio;
|
|
|
|
struct iovec *iov;
|
|
|
|
struct socket *so;
|
2011-08-11 12:30:23 +00:00
|
|
|
cap_rights_t rights;
|
2003-05-05 20:33:38 +00:00
|
|
|
#ifdef KTRACE
|
2004-07-10 15:42:16 +00:00
|
|
|
struct uio *ktruio = NULL;
|
2003-05-05 20:33:38 +00:00
|
|
|
#endif
|
2013-09-05 00:17:38 +00:00
|
|
|
ssize_t len;
|
|
|
|
int i, error;
|
2003-05-05 20:33:38 +00:00
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_init(&rights, CAP_SEND);
|
2013-02-07 00:36:00 +00:00
|
|
|
if (mp->msg_name != NULL) {
|
2013-03-02 21:11:30 +00:00
|
|
|
AUDIT_ARG_SOCKADDR(td, AT_FDCWD, mp->msg_name);
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_set(&rights, CAP_CONNECT);
|
2013-02-07 00:36:00 +00:00
|
|
|
}
|
2016-09-22 09:58:46 +00:00
|
|
|
error = getsock_cap(td, s, &rights, &fp, NULL, NULL);
|
2016-10-21 18:27:30 +00:00
|
|
|
if (error != 0) {
|
|
|
|
m_freem(control);
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2016-10-21 18:27:30 +00:00
|
|
|
}
|
2004-10-24 23:45:01 +00:00
|
|
|
so = (struct socket *)fp->f_data;
|
2003-05-05 20:33:38 +00:00
|
|
|
|
2011-06-07 17:40:33 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(mp->msg_name);
|
|
|
|
#endif
|
2003-05-05 20:33:38 +00:00
|
|
|
#ifdef MAC
|
2009-06-02 18:26:17 +00:00
|
|
|
if (mp->msg_name != NULL) {
|
2008-05-22 07:18:54 +00:00
|
|
|
error = mac_socket_check_connect(td->td_ucred, so,
|
|
|
|
mp->msg_name);
|
2016-10-21 18:27:30 +00:00
|
|
|
if (error != 0) {
|
|
|
|
m_freem(control);
|
2009-06-02 18:26:17 +00:00
|
|
|
goto bad;
|
2016-10-21 18:27:30 +00:00
|
|
|
}
|
2009-06-02 18:26:17 +00:00
|
|
|
}
|
|
|
|
error = mac_socket_check_send(td->td_ucred, so);
|
2016-10-21 18:27:30 +00:00
|
|
|
if (error != 0) {
|
|
|
|
m_freem(control);
|
2003-05-05 20:33:38 +00:00
|
|
|
goto bad;
|
2016-10-21 18:27:30 +00:00
|
|
|
}
|
2003-05-05 20:33:38 +00:00
|
|
|
#endif
|
|
|
|
|
|
|
|
auio.uio_iov = mp->msg_iov;
|
|
|
|
auio.uio_iovcnt = mp->msg_iovlen;
|
2005-01-30 07:20:36 +00:00
|
|
|
auio.uio_segflg = segflg;
|
2003-05-05 20:33:38 +00:00
|
|
|
auio.uio_rw = UIO_WRITE;
|
|
|
|
auio.uio_td = td;
|
|
|
|
auio.uio_offset = 0; /* XXX */
|
|
|
|
auio.uio_resid = 0;
|
|
|
|
iov = mp->msg_iov;
|
|
|
|
for (i = 0; i < mp->msg_iovlen; i++, iov++) {
|
|
|
|
if ((auio.uio_resid += iov->iov_len) < 0) {
|
|
|
|
error = EINVAL;
|
2016-10-21 18:27:30 +00:00
|
|
|
m_freem(control);
|
2003-05-05 20:33:38 +00:00
|
|
|
goto bad;
|
|
|
|
}
|
2000-11-18 21:01:04 +00:00
|
|
|
}
|
1994-10-02 17:35:40 +00:00
|
|
|
#ifdef KTRACE
|
2004-07-10 15:42:16 +00:00
|
|
|
if (KTRPOINT(td, KTR_GENIO))
|
|
|
|
ktruio = cloneuio(&auio);
|
1994-10-02 17:35:40 +00:00
|
|
|
#endif
|
|
|
|
len = auio.uio_resid;
|
soreceive_generic(), and sopoll_generic(). Add new functions sosend(),
soreceive(), and sopoll(), which are wrappers for pru_sosend,
pru_soreceive, and pru_sopoll, and are now used univerally by socket
consumers rather than either directly invoking the old so*() functions
or directly invoking the protocol switch method (about an even split
prior to this commit).
This completes an architectural change that was begun in 1996 to permit
protocols to provide substitute implementations, as now used by UDP.
Consumers now uniformly invoke sosend(), soreceive(), and sopoll() to
perform these operations on sockets -- in particular, distributed file
systems and socket system calls.
Architectural head nod: sam, gnn, wollman
2006-07-24 15:20:08 +00:00
|
|
|
error = sosend(so, mp->msg_name, &auio, 0, control, flags, td);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
1994-10-02 17:35:40 +00:00
|
|
|
if (auio.uio_resid != len && (error == ERESTART ||
|
|
|
|
error == EINTR || error == EWOULDBLOCK))
|
|
|
|
error = 0;
|
2002-06-20 18:52:54 +00:00
|
|
|
/* Generation of SIGPIPE can be controlled per socket */
|
2005-03-08 16:11:41 +00:00
|
|
|
if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
|
|
|
|
!(flags & MSG_NOSIGNAL)) {
|
2001-09-12 08:38:13 +00:00
|
|
|
PROC_LOCK(td->td_proc);
|
2010-06-29 20:44:19 +00:00
|
|
|
tdsignal(td, SIGPIPE);
|
2001-09-12 08:38:13 +00:00
|
|
|
PROC_UNLOCK(td->td_proc);
|
2001-03-07 03:37:06 +00:00
|
|
|
}
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
|
|
|
if (error == 0)
|
2001-09-12 08:38:13 +00:00
|
|
|
td->td_retval[0] = len - auio.uio_resid;
|
1994-10-02 17:35:40 +00:00
|
|
|
#ifdef KTRACE
|
2004-07-10 15:42:16 +00:00
|
|
|
if (ktruio != NULL) {
|
|
|
|
ktruio->uio_resid = td->td_retval[0];
|
|
|
|
ktrgenio(s, UIO_WRITE, ktruio, error);
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
bad:
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
1994-10-02 17:35:40 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_sendto(struct thread *td, struct sendto_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
|
|
|
struct iovec aiov;
|
|
|
|
|
|
|
|
msg.msg_name = uap->to;
|
|
|
|
msg.msg_namelen = uap->tolen;
|
|
|
|
msg.msg_iov = &aiov;
|
|
|
|
msg.msg_iovlen = 1;
|
|
|
|
msg.msg_control = 0;
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
msg.msg_flags = 0;
|
|
|
|
#endif
|
|
|
|
aiov.iov_base = uap->buf;
|
|
|
|
aiov.iov_len = uap->len;
|
2013-09-05 00:17:38 +00:00
|
|
|
return (sendit(td, uap->s, &msg, uap->flags));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
osend(struct thread *td, struct osend_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
|
|
|
struct iovec aiov;
|
|
|
|
|
|
|
|
msg.msg_name = 0;
|
|
|
|
msg.msg_namelen = 0;
|
|
|
|
msg.msg_iov = &aiov;
|
|
|
|
msg.msg_iovlen = 1;
|
|
|
|
aiov.iov_base = uap->buf;
|
|
|
|
aiov.iov_len = uap->len;
|
|
|
|
msg.msg_control = 0;
|
|
|
|
msg.msg_flags = 0;
|
2013-09-05 00:17:38 +00:00
|
|
|
return (sendit(td, uap->s, &msg, uap->flags));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
osendmsg(struct thread *td, struct osendmsg_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
2004-07-10 15:42:16 +00:00
|
|
|
struct iovec *iov;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2002-06-28 23:48:23 +00:00
|
|
|
error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
|
|
|
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_iov = iov;
|
2004-07-10 15:42:16 +00:00
|
|
|
msg.msg_flags = MSG_COMPAT;
|
2001-09-12 08:38:13 +00:00
|
|
|
error = sendit(td, uap->s, &msg, uap->flags);
|
2004-07-10 15:42:16 +00:00
|
|
|
free(iov, M_IOV);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_sendmsg(struct thread *td, struct sendmsg_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
2004-07-10 15:42:16 +00:00
|
|
|
struct iovec *iov;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2002-06-28 23:48:23 +00:00
|
|
|
error = copyin(uap->msg, &msg, sizeof (msg));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
|
|
|
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_iov = iov;
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
msg.msg_flags = 0;
|
|
|
|
#endif
|
2001-09-12 08:38:13 +00:00
|
|
|
error = sendit(td, uap->s, &msg, uap->flags);
|
2004-07-10 15:42:16 +00:00
|
|
|
free(iov, M_IOV);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2005-10-15 05:57:06 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
|
|
|
|
struct mbuf **controlp)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct uio auio;
|
2004-07-10 15:42:16 +00:00
|
|
|
struct iovec *iov;
|
2013-02-07 00:27:11 +00:00
|
|
|
struct mbuf *m, *control = NULL;
|
1996-05-09 20:15:26 +00:00
|
|
|
caddr_t ctlbuf;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
1997-04-27 20:01:29 +00:00
|
|
|
struct socket *so;
|
2013-02-07 00:27:11 +00:00
|
|
|
struct sockaddr *fromsa = NULL;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
2004-07-10 15:42:16 +00:00
|
|
|
struct uio *ktruio = NULL;
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
2013-09-05 00:17:38 +00:00
|
|
|
ssize_t len;
|
|
|
|
int error, i;
|
1995-05-30 08:16:23 +00:00
|
|
|
|
2010-02-18 22:12:40 +00:00
|
|
|
if (controlp != NULL)
|
|
|
|
*controlp = NULL;
|
2005-10-31 21:09:56 +00:00
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, s, cap_rights_init(&rights, CAP_RECV),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
2004-10-24 23:45:01 +00:00
|
|
|
so = fp->f_data;
|
2002-10-06 14:39:15 +00:00
|
|
|
|
|
|
|
#ifdef MAC
|
2007-10-24 19:04:04 +00:00
|
|
|
error = mac_socket_check_receive(td->td_ucred, so);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
2002-10-06 14:39:15 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
auio.uio_iov = mp->msg_iov;
|
|
|
|
auio.uio_iovcnt = mp->msg_iovlen;
|
2006-07-10 21:38:17 +00:00
|
|
|
auio.uio_segflg = UIO_USERSPACE;
|
1994-10-02 17:35:40 +00:00
|
|
|
auio.uio_rw = UIO_READ;
|
2001-09-12 08:38:13 +00:00
|
|
|
auio.uio_td = td;
|
1994-05-24 10:09:53 +00:00
|
|
|
auio.uio_offset = 0; /* XXX */
|
|
|
|
auio.uio_resid = 0;
|
|
|
|
iov = mp->msg_iov;
|
|
|
|
for (i = 0; i < mp->msg_iovlen; i++, iov++) {
|
2000-11-18 21:01:04 +00:00
|
|
|
if ((auio.uio_resid += iov->iov_len) < 0) {
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (EINVAL);
|
2000-11-18 21:01:04 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
#ifdef KTRACE
|
2004-07-10 15:42:16 +00:00
|
|
|
if (KTRPOINT(td, KTR_GENIO))
|
|
|
|
ktruio = cloneuio(&auio);
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
|
|
|
len = auio.uio_resid;
|
2013-02-07 00:27:11 +00:00
|
|
|
error = soreceive(so, &fromsa, &auio, NULL,
|
|
|
|
(mp->msg_control || controlp) ? &control : NULL,
|
1994-10-02 17:35:40 +00:00
|
|
|
&mp->msg_flags);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2012-02-21 01:05:12 +00:00
|
|
|
if (auio.uio_resid != len && (error == ERESTART ||
|
1994-05-24 10:09:53 +00:00
|
|
|
error == EINTR || error == EWOULDBLOCK))
|
|
|
|
error = 0;
|
|
|
|
}
|
2013-02-07 00:36:00 +00:00
|
|
|
if (fromsa != NULL)
|
2013-03-02 21:11:30 +00:00
|
|
|
AUDIT_ARG_SOCKADDR(td, AT_FDCWD, fromsa);
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
2004-07-10 15:42:16 +00:00
|
|
|
if (ktruio != NULL) {
|
2012-02-21 01:05:12 +00:00
|
|
|
ktruio->uio_resid = len - auio.uio_resid;
|
2004-07-10 15:42:16 +00:00
|
|
|
ktrgenio(s, UIO_READ, ktruio, error);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
#endif
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-10-02 17:35:40 +00:00
|
|
|
goto out;
|
2012-02-21 01:05:12 +00:00
|
|
|
td->td_retval[0] = len - auio.uio_resid;
|
1994-10-02 17:35:40 +00:00
|
|
|
if (mp->msg_name) {
|
|
|
|
len = mp->msg_namelen;
|
2013-02-07 00:27:11 +00:00
|
|
|
if (len <= 0 || fromsa == NULL)
|
1994-10-02 17:35:40 +00:00
|
|
|
len = 0;
|
|
|
|
else {
|
1997-12-14 03:15:21 +00:00
|
|
|
/* save sa_len before it is destroyed by MSG_COMPAT */
|
|
|
|
len = MIN(len, fromsa->sa_len);
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
1994-10-02 17:35:40 +00:00
|
|
|
if (mp->msg_flags & MSG_COMPAT)
|
1997-08-16 19:16:27 +00:00
|
|
|
((struct osockaddr *)fromsa)->sa_family =
|
|
|
|
fromsa->sa_family;
|
|
|
|
#endif
|
2006-07-10 21:38:17 +00:00
|
|
|
if (fromseg == UIO_USERSPACE) {
|
|
|
|
error = copyout(fromsa, mp->msg_name,
|
|
|
|
(unsigned)len);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
goto out;
|
|
|
|
} else
|
|
|
|
bcopy(fromsa, mp->msg_name, len);
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
|
|
|
mp->msg_namelen = len;
|
|
|
|
}
|
2005-10-31 21:09:56 +00:00
|
|
|
if (mp->msg_control && controlp == NULL) {
|
1994-10-02 17:35:40 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
/*
|
|
|
|
* We assume that old recvmsg calls won't receive access
|
|
|
|
* rights and other control info, esp. as control info
|
|
|
|
* is always optional and those options didn't exist in 4.3.
|
|
|
|
* If we receive rights, trim the cmsghdr; anything else
|
|
|
|
* is tossed.
|
|
|
|
*/
|
|
|
|
if (control && mp->msg_flags & MSG_COMPAT) {
|
|
|
|
if (mtod(control, struct cmsghdr *)->cmsg_level !=
|
|
|
|
SOL_SOCKET ||
|
|
|
|
mtod(control, struct cmsghdr *)->cmsg_type !=
|
|
|
|
SCM_RIGHTS) {
|
|
|
|
mp->msg_controllen = 0;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
control->m_len -= sizeof (struct cmsghdr);
|
|
|
|
control->m_data += sizeof (struct cmsghdr);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
len = mp->msg_controllen;
|
1996-05-09 20:15:26 +00:00
|
|
|
m = control;
|
|
|
|
mp->msg_controllen = 0;
|
2002-06-28 23:48:23 +00:00
|
|
|
ctlbuf = mp->msg_control;
|
1996-05-09 20:15:26 +00:00
|
|
|
|
|
|
|
while (m && len > 0) {
|
|
|
|
unsigned int tocopy;
|
|
|
|
|
2004-01-11 19:56:42 +00:00
|
|
|
if (len >= m->m_len)
|
1996-05-09 20:15:26 +00:00
|
|
|
tocopy = m->m_len;
|
|
|
|
else {
|
1994-10-02 17:35:40 +00:00
|
|
|
mp->msg_flags |= MSG_CTRUNC;
|
1996-05-09 20:15:26 +00:00
|
|
|
tocopy = len;
|
|
|
|
}
|
2004-01-11 19:56:42 +00:00
|
|
|
|
2002-06-28 23:48:23 +00:00
|
|
|
if ((error = copyout(mtod(m, caddr_t),
|
1999-01-27 21:50:00 +00:00
|
|
|
ctlbuf, tocopy)) != 0)
|
1996-05-09 20:15:26 +00:00
|
|
|
goto out;
|
|
|
|
|
|
|
|
ctlbuf += tocopy;
|
|
|
|
len -= tocopy;
|
|
|
|
m = m->m_next;
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
1999-11-24 20:49:04 +00:00
|
|
|
mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
|
|
|
out:
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
2008-02-23 01:01:49 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (fromsa && KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(fromsa);
|
|
|
|
#endif
|
2013-09-05 00:17:38 +00:00
|
|
|
free(fromsa, M_SONAME);
|
2005-10-31 21:09:56 +00:00
|
|
|
|
2013-02-07 00:27:11 +00:00
|
|
|
if (error == 0 && controlp != NULL)
|
2005-10-31 21:09:56 +00:00
|
|
|
*controlp = control;
|
|
|
|
else if (control)
|
1994-10-02 17:35:40 +00:00
|
|
|
m_freem(control);
|
2005-10-31 21:09:56 +00:00
|
|
|
|
1994-10-02 17:35:40 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2005-10-15 05:57:06 +00:00
|
|
|
static int
|
2016-09-09 17:40:26 +00:00
|
|
|
recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp)
|
2005-10-15 05:57:06 +00:00
|
|
|
{
|
2006-07-10 21:38:17 +00:00
|
|
|
int error;
|
2005-10-15 05:57:06 +00:00
|
|
|
|
2006-07-10 21:38:17 +00:00
|
|
|
error = kern_recvit(td, s, mp, UIO_USERSPACE, NULL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (namelenp != NULL) {
|
2006-07-10 21:38:17 +00:00
|
|
|
error = copyout(&mp->msg_namelen, namelenp, sizeof (socklen_t));
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
if (mp->msg_flags & MSG_COMPAT)
|
|
|
|
error = 0; /* old recvfrom didn't check */
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
return (error);
|
2005-10-15 05:57:06 +00:00
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_recvfrom(struct thread *td, struct recvfrom_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
|
|
|
struct iovec aiov;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (uap->fromlenaddr) {
|
2002-06-28 23:48:23 +00:00
|
|
|
error = copyin(uap->fromlenaddr,
|
|
|
|
&msg.msg_namelen, sizeof (msg.msg_namelen));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
goto done2;
|
|
|
|
} else {
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_namelen = 0;
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_name = uap->from;
|
|
|
|
msg.msg_iov = &aiov;
|
|
|
|
msg.msg_iovlen = 1;
|
|
|
|
aiov.iov_base = uap->buf;
|
|
|
|
aiov.iov_len = uap->len;
|
|
|
|
msg.msg_control = 0;
|
|
|
|
msg.msg_flags = uap->flags;
|
2002-06-29 00:02:01 +00:00
|
|
|
error = recvit(td, uap->s, &msg, uap->fromlenaddr);
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
done2:
|
2013-09-05 00:17:38 +00:00
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1994-10-02 17:35:40 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
orecvfrom(struct thread *td, struct recvfrom_args *uap)
|
1994-10-02 17:35:40 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
uap->flags |= MSG_COMPAT;
|
2011-09-16 13:58:51 +00:00
|
|
|
return (sys_recvfrom(td, uap));
|
1994-10-02 17:35:40 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-10 09:00:12 +00:00
|
|
|
orecv(struct thread *td, struct orecv_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
|
|
|
struct iovec aiov;
|
|
|
|
|
|
|
|
msg.msg_name = 0;
|
|
|
|
msg.msg_namelen = 0;
|
|
|
|
msg.msg_iov = &aiov;
|
|
|
|
msg.msg_iovlen = 1;
|
|
|
|
aiov.iov_base = uap->buf;
|
|
|
|
aiov.iov_len = uap->len;
|
|
|
|
msg.msg_control = 0;
|
|
|
|
msg.msg_flags = uap->flags;
|
2013-09-05 00:17:38 +00:00
|
|
|
return (recvit(td, uap->s, &msg, NULL));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Old recvmsg. This code takes advantage of the fact that the old msghdr
|
|
|
|
* overlays the new one, missing only the flags, and with the (old) access
|
|
|
|
* rights where the control fields are now.
|
|
|
|
*/
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
orecvmsg(struct thread *td, struct orecvmsg_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
2004-07-10 15:42:16 +00:00
|
|
|
struct iovec *iov;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2002-06-29 00:02:01 +00:00
|
|
|
error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
2004-07-10 15:42:16 +00:00
|
|
|
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
|
|
|
msg.msg_flags = uap->flags | MSG_COMPAT;
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_iov = iov;
|
2002-06-29 00:02:01 +00:00
|
|
|
error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
|
1994-05-24 10:09:53 +00:00
|
|
|
if (msg.msg_controllen && error == 0)
|
2002-06-29 00:02:01 +00:00
|
|
|
error = copyout(&msg.msg_controllen,
|
|
|
|
&uap->msg->msg_accrightslen, sizeof (int));
|
2004-07-10 15:42:16 +00:00
|
|
|
free(iov, M_IOV);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_recvmsg(struct thread *td, struct recvmsg_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
|
|
|
struct msghdr msg;
|
2004-07-10 15:42:16 +00:00
|
|
|
struct iovec *uiov, *iov;
|
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2002-06-29 00:02:01 +00:00
|
|
|
error = copyin(uap->msg, &msg, sizeof (msg));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
|
|
|
error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-10 15:42:16 +00:00
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_flags = uap->flags;
|
2004-07-10 15:42:16 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
msg.msg_flags &= ~MSG_COMPAT;
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
|
|
|
uiov = msg.msg_iov;
|
|
|
|
msg.msg_iov = iov;
|
2002-06-29 00:02:01 +00:00
|
|
|
error = recvit(td, uap->s, &msg, NULL);
|
2004-07-10 15:42:16 +00:00
|
|
|
if (error == 0) {
|
1994-05-24 10:09:53 +00:00
|
|
|
msg.msg_iov = uiov;
|
2002-06-29 00:02:01 +00:00
|
|
|
error = copyout(&msg, uap->msg, sizeof(msg));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2004-07-10 15:42:16 +00:00
|
|
|
free(iov, M_IOV);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
1995-10-07 23:47:26 +00:00
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_shutdown(struct thread *td, struct shutdown_args *uap)
|
2017-01-30 12:57:22 +00:00
|
|
|
{
|
|
|
|
|
|
|
|
return (kern_shutdown(td, uap->s, uap->how));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_shutdown(struct thread *td, int s, int how)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-11-17 03:07:11 +00:00
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2017-01-30 12:57:22 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
|
|
|
error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SHUTDOWN),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2004-10-24 23:45:01 +00:00
|
|
|
if (error == 0) {
|
|
|
|
so = fp->f_data;
|
2017-01-30 12:57:22 +00:00
|
|
|
error = soshutdown(so, how);
|
Make shutdown() return ENOTCONN as required by POSIX, part deux.
Summary:
Back in 2005, maxim@ attempted to fix shutdown() to return ENOTCONN in case the socket was not connected (r150152). This had to be rolled back (r150155), as it broke some of the existing programs that depend on this behavior. I reapplied this change on my system and indeed, syslogd failed to start up. I fixed this back in February (279016) and MFC'ed it to the supported stable branches. Apart from that, things seem to work out all right.
Since at least Linux and Mac OS X do the right thing, I'd like to go ahead and give this another try. To keep old copies of syslogd working, only start returning ENOTCONN for recent binaries.
I took a look at the XNU sources and they seem to test against both SS_ISCONNECTED, SS_ISCONNECTING and SS_ISDISCONNECTING, instead of just SS_ISCONNECTED. That seams reasonable, so let's do the same.
Test Plan:
This issue was uncovered while writing tests for shutdown() in CloudABI:
https://github.com/NuxiNL/cloudlibc/blob/master/src/libc/sys/socket/shutdown_test.c#L26
Reviewers: glebius, rwatson, #manpages, gnn, #network
Reviewed By: gnn, #network
Subscribers: bms, mjg, imp
Differential Revision: https://reviews.freebsd.org/D3039
2015-07-27 13:17:57 +00:00
|
|
|
/*
|
|
|
|
* Previous versions did not return ENOTCONN, but 0 in
|
|
|
|
* case the socket was not connected. Some important
|
|
|
|
* programs like syslogd up to r279016, 2015-02-19,
|
|
|
|
* still depend on this behavior.
|
|
|
|
*/
|
|
|
|
if (error == ENOTCONN &&
|
|
|
|
td->td_proc->p_osrel < P_OSREL_SHUTDOWN_ENOTCONN)
|
|
|
|
error = 0;
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
}
|
2004-10-24 23:45:01 +00:00
|
|
|
return (error);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_setsockopt(struct thread *td, struct setsockopt_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2004-07-17 21:06:36 +00:00
|
|
|
|
|
|
|
return (kern_setsockopt(td, uap->s, uap->level, uap->name,
|
|
|
|
uap->val, UIO_USERSPACE, uap->valsize));
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
kern_setsockopt(struct thread *td, int s, int level, int name, void *val,
|
|
|
|
enum uio_seg valseg, socklen_t valsize)
|
2004-07-17 21:06:36 +00:00
|
|
|
{
|
2001-11-17 03:07:11 +00:00
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
1998-08-23 03:07:17 +00:00
|
|
|
struct sockopt sopt;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2013-09-05 00:17:38 +00:00
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-07-17 21:06:36 +00:00
|
|
|
if (val == NULL && valsize != 0)
|
1998-08-23 03:07:17 +00:00
|
|
|
return (EFAULT);
|
2006-06-20 12:36:40 +00:00
|
|
|
if ((int)valsize < 0)
|
1998-08-23 03:07:17 +00:00
|
|
|
return (EINVAL);
|
|
|
|
|
2004-07-17 21:06:36 +00:00
|
|
|
sopt.sopt_dir = SOPT_SET;
|
|
|
|
sopt.sopt_level = level;
|
|
|
|
sopt.sopt_name = name;
|
|
|
|
sopt.sopt_val = val;
|
|
|
|
sopt.sopt_valsize = valsize;
|
|
|
|
switch (valseg) {
|
|
|
|
case UIO_USERSPACE:
|
2001-09-12 08:38:13 +00:00
|
|
|
sopt.sopt_td = td;
|
2004-07-17 21:06:36 +00:00
|
|
|
break;
|
|
|
|
case UIO_SYSSPACE:
|
|
|
|
sopt.sopt_td = NULL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
panic("kern_setsockopt called with bad valseg");
|
|
|
|
}
|
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SETSOCKOPT),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2004-10-24 23:45:01 +00:00
|
|
|
if (error == 0) {
|
|
|
|
so = fp->f_data;
|
2001-11-17 03:07:11 +00:00
|
|
|
error = sosetopt(so, &sopt);
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
}
|
2000-11-18 21:01:04 +00:00
|
|
|
return(error);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_getsockopt(struct thread *td, struct getsockopt_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2003-12-24 18:47:43 +00:00
|
|
|
socklen_t valsize;
|
2013-09-05 00:17:38 +00:00
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
if (uap->val) {
|
2002-06-29 00:02:01 +00:00
|
|
|
error = copyin(uap->avalsize, &valsize, sizeof (valsize));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2004-07-17 21:06:36 +00:00
|
|
|
return (error);
|
2000-11-18 21:01:04 +00:00
|
|
|
}
|
1998-08-23 03:07:17 +00:00
|
|
|
|
2004-07-17 21:06:36 +00:00
|
|
|
error = kern_getsockopt(td, uap->s, uap->level, uap->name,
|
|
|
|
uap->val, UIO_USERSPACE, &valsize);
|
1998-08-23 03:07:17 +00:00
|
|
|
|
2004-07-17 21:06:36 +00:00
|
|
|
if (error == 0)
|
2002-06-29 00:02:01 +00:00
|
|
|
error = copyout(&valsize, uap->avalsize, sizeof (valsize));
|
2004-07-17 21:06:36 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Kernel version of getsockopt.
|
|
|
|
* optval can be a userland or userspace. optlen is always a kernel pointer.
|
|
|
|
*/
|
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
kern_getsockopt(struct thread *td, int s, int level, int name, void *val,
|
|
|
|
enum uio_seg valseg, socklen_t *valsize)
|
2004-07-17 21:06:36 +00:00
|
|
|
{
|
2013-09-05 00:17:38 +00:00
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
2013-09-05 00:17:38 +00:00
|
|
|
struct sockopt sopt;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2013-09-05 00:17:38 +00:00
|
|
|
int error;
|
2004-07-17 21:06:36 +00:00
|
|
|
|
|
|
|
if (val == NULL)
|
|
|
|
*valsize = 0;
|
2006-06-20 12:36:40 +00:00
|
|
|
if ((int)*valsize < 0)
|
2004-07-17 21:06:36 +00:00
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
sopt.sopt_dir = SOPT_GET;
|
|
|
|
sopt.sopt_level = level;
|
|
|
|
sopt.sopt_name = name;
|
|
|
|
sopt.sopt_val = val;
|
|
|
|
sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
|
|
|
|
switch (valseg) {
|
|
|
|
case UIO_USERSPACE:
|
|
|
|
sopt.sopt_td = td;
|
|
|
|
break;
|
|
|
|
case UIO_SYSSPACE:
|
|
|
|
sopt.sopt_td = NULL;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
panic("kern_getsockopt called with bad valseg");
|
|
|
|
}
|
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(s);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, s, cap_rights_init(&rights, CAP_GETSOCKOPT),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2004-10-24 23:45:01 +00:00
|
|
|
if (error == 0) {
|
|
|
|
so = fp->f_data;
|
2004-07-17 21:06:36 +00:00
|
|
|
error = sogetopt(so, &sopt);
|
|
|
|
*valsize = sopt.sopt_valsize;
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2004-01-11 19:56:42 +00:00
|
|
|
* getsockname1() - Get socket name.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
1995-10-07 23:47:26 +00:00
|
|
|
static int
|
2016-09-09 17:40:26 +00:00
|
|
|
getsockname1(struct thread *td, struct getsockname_args *uap, int compat)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
1997-08-16 19:16:27 +00:00
|
|
|
struct sockaddr *sa;
|
2006-07-10 21:38:17 +00:00
|
|
|
socklen_t len;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = copyin(uap->alen, &len, sizeof(len));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
error = kern_getsockname(td, uap->fdes, &sa, &len);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (len != 0) {
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
if (compat)
|
|
|
|
((struct osockaddr *)sa)->sa_family = sa->sa_family;
|
|
|
|
#endif
|
|
|
|
error = copyout(sa, uap->asa, (u_int)len);
|
|
|
|
}
|
|
|
|
free(sa, M_SONAME);
|
|
|
|
if (error == 0)
|
|
|
|
error = copyout(&len, uap->alen, sizeof(len));
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
|
|
|
|
socklen_t *alen)
|
|
|
|
{
|
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2003-12-24 18:47:43 +00:00
|
|
|
socklen_t len;
|
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(fd);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETSOCKNAME),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2004-10-24 23:45:01 +00:00
|
|
|
so = fp->f_data;
|
2006-07-10 21:38:17 +00:00
|
|
|
*sa = NULL;
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_SET(so->so_vnet);
|
2006-07-10 21:38:17 +00:00
|
|
|
error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, sa);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_RESTORE();
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto bad;
|
2006-07-10 21:38:17 +00:00
|
|
|
if (*sa == NULL)
|
1997-08-16 19:16:27 +00:00
|
|
|
len = 0;
|
2006-07-10 21:38:17 +00:00
|
|
|
else
|
|
|
|
len = MIN(*alen, (*sa)->sa_len);
|
|
|
|
*alen = len;
|
2008-02-23 01:01:49 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(*sa);
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
bad:
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0 && *sa != NULL) {
|
2006-07-10 21:38:17 +00:00
|
|
|
free(*sa, M_SONAME);
|
|
|
|
*sa = NULL;
|
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_getsockname(struct thread *td, struct getsockname_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
|
|
|
|
return (getsockname1(td, uap, 0));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
1995-10-23 15:42:12 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
1994-05-25 09:21:21 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
ogetsockname(struct thread *td, struct getsockname_args *uap)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
|
|
|
|
return (getsockname1(td, uap, 1));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
1995-10-23 15:42:12 +00:00
|
|
|
#endif /* COMPAT_OLDSOCK */
|
1994-05-24 10:09:53 +00:00
|
|
|
|
1995-10-23 15:42:12 +00:00
|
|
|
/*
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
* getpeername1() - Get name of peer for connected socket.
|
1995-10-23 15:42:12 +00:00
|
|
|
*/
|
1995-10-07 23:47:26 +00:00
|
|
|
static int
|
2016-09-09 17:40:26 +00:00
|
|
|
getpeername1(struct thread *td, struct getpeername_args *uap, int compat)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
1997-08-16 19:16:27 +00:00
|
|
|
struct sockaddr *sa;
|
2006-07-10 21:38:17 +00:00
|
|
|
socklen_t len;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
error = copyin(uap->alen, &len, sizeof (len));
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
error = kern_getpeername(td, uap->fdes, &sa, &len);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2006-07-10 21:38:17 +00:00
|
|
|
return (error);
|
|
|
|
|
|
|
|
if (len != 0) {
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
if (compat)
|
|
|
|
((struct osockaddr *)sa)->sa_family = sa->sa_family;
|
|
|
|
#endif
|
|
|
|
error = copyout(sa, uap->asa, (u_int)len);
|
|
|
|
}
|
|
|
|
free(sa, M_SONAME);
|
|
|
|
if (error == 0)
|
|
|
|
error = copyout(&len, uap->alen, sizeof(len));
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
kern_getpeername(struct thread *td, int fd, struct sockaddr **sa,
|
|
|
|
socklen_t *alen)
|
|
|
|
{
|
|
|
|
struct socket *so;
|
2004-10-24 23:45:01 +00:00
|
|
|
struct file *fp;
|
Change the cap_rights_t type from uint64_t to a structure that we can extend
in the future in a backward compatible (API and ABI) way.
The cap_rights_t represents capability rights. We used to use one bit to
represent one right, but we are running out of spare bits. Currently the new
structure provides place for 114 rights (so 50 more than the previous
cap_rights_t), but it is possible to grow the structure to hold at least 285
rights, although we can make it even larger if 285 rights won't be enough.
The structure definition looks like this:
struct cap_rights {
uint64_t cr_rights[CAP_RIGHTS_VERSION + 2];
};
The initial CAP_RIGHTS_VERSION is 0.
The top two bits in the first element of the cr_rights[] array contain total
number of elements in the array - 2. This means if those two bits are equal to
0, we have 2 array elements.
The top two bits in all remaining array elements should be 0.
The next five bits in all array elements contain array index. Only one bit is
used and bit position in this five-bits range defines array index. This means
there can be at most five array elements in the future.
To define new right the CAPRIGHT() macro must be used. The macro takes two
arguments - an array index and a bit to set, eg.
#define CAP_PDKILL CAPRIGHT(1, 0x0000000000000800ULL)
We still support aliases that combine few rights, but the rights have to belong
to the same array element, eg:
#define CAP_LOOKUP CAPRIGHT(0, 0x0000000000000400ULL)
#define CAP_FCHMOD CAPRIGHT(0, 0x0000000000002000ULL)
#define CAP_FCHMODAT (CAP_FCHMOD | CAP_LOOKUP)
There is new API to manage the new cap_rights_t structure:
cap_rights_t *cap_rights_init(cap_rights_t *rights, ...);
void cap_rights_set(cap_rights_t *rights, ...);
void cap_rights_clear(cap_rights_t *rights, ...);
bool cap_rights_is_set(const cap_rights_t *rights, ...);
bool cap_rights_is_valid(const cap_rights_t *rights);
void cap_rights_merge(cap_rights_t *dst, const cap_rights_t *src);
void cap_rights_remove(cap_rights_t *dst, const cap_rights_t *src);
bool cap_rights_contains(const cap_rights_t *big, const cap_rights_t *little);
Capability rights to the cap_rights_init(), cap_rights_set(),
cap_rights_clear() and cap_rights_is_set() functions are provided by
separating them with commas, eg:
cap_rights_t rights;
cap_rights_init(&rights, CAP_READ, CAP_WRITE, CAP_FSTAT);
There is no need to terminate the list of rights, as those functions are
actually macros that take care of the termination, eg:
#define cap_rights_set(rights, ...) \
__cap_rights_set((rights), __VA_ARGS__, 0ULL)
void __cap_rights_set(cap_rights_t *rights, ...);
Thanks to using one bit as an array index we can assert in those functions that
there are no two rights belonging to different array elements provided
together. For example this is illegal and will be detected, because CAP_LOOKUP
belongs to element 0 and CAP_PDKILL to element 1:
cap_rights_init(&rights, CAP_LOOKUP | CAP_PDKILL);
Providing several rights that belongs to the same array's element this way is
correct, but is not advised. It should only be used for aliases definition.
This commit also breaks compatibility with some existing Capsicum system calls,
but I see no other way to do that. This should be fine as Capsicum is still
experimental and this change is not going to 9.x.
Sponsored by: The FreeBSD Foundation
2013-09-05 00:09:56 +00:00
|
|
|
cap_rights_t rights;
|
2003-12-24 18:47:43 +00:00
|
|
|
socklen_t len;
|
|
|
|
int error;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2009-07-01 19:55:11 +00:00
|
|
|
AUDIT_ARG_FD(fd);
|
2015-04-11 16:00:33 +00:00
|
|
|
error = getsock_cap(td, fd, cap_rights_init(&rights, CAP_GETPEERNAME),
|
2016-09-22 09:58:46 +00:00
|
|
|
&fp, NULL, NULL);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
2007-08-06 14:26:03 +00:00
|
|
|
return (error);
|
2004-10-24 23:45:01 +00:00
|
|
|
so = fp->f_data;
|
2000-11-18 21:01:04 +00:00
|
|
|
if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
|
Giant pushdown syscalls in kern/uipc_syscalls.c. Affected calls:
recvmsg(), sendmsg(), recvfrom(), accept(), getpeername(), getsockname(),
socket(), connect(), accept(), send(), recv(), bind(), setsockopt(), listen(),
sendto(), shutdown(), socketpair(), sendfile()
2001-08-31 00:37:34 +00:00
|
|
|
error = ENOTCONN;
|
2007-08-06 14:26:03 +00:00
|
|
|
goto done;
|
2000-11-18 21:01:04 +00:00
|
|
|
}
|
2006-07-10 21:38:17 +00:00
|
|
|
*sa = NULL;
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_SET(so->so_vnet);
|
2006-07-10 21:38:17 +00:00
|
|
|
error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, sa);
|
Change the curvnet variable from a global const struct vnet *,
previously always pointing to the default vnet context, to a
dynamically changing thread-local one. The currvnet context
should be set on entry to networking code via CURVNET_SET() macros,
and reverted to previous state via CURVNET_RESTORE(). Recursions
on curvnet are permitted, though strongly discuouraged.
This change should have no functional impact on nooptions VIMAGE
kernel builds, where CURVNET_* macros expand to whitespace.
The curthread->td_vnet (aka curvnet) variable's purpose is to be an
indicator of the vnet context in which the current network-related
operation takes place, in case we cannot deduce the current vnet
context from any other source, such as by looking at mbuf's
m->m_pkthdr.rcvif->if_vnet, sockets's so->so_vnet etc. Moreover, so
far curvnet has turned out to be an invaluable consistency checking
aid: it helps to catch cases when sockets, ifnets or any other
vnet-aware structures may have leaked from one vnet to another.
The exact placement of the CURVNET_SET() / CURVNET_RESTORE() macros
was a result of an empirical iterative process, whith an aim to
reduce recursions on CURVNET_SET() to a minimum, while still reducing
the scope of CURVNET_SET() to networking only operations - the
alternative would be calling CURVNET_SET() on each system call entry.
In general, curvnet has to be set in three typicall cases: when
processing socket-related requests from userspace or from within the
kernel; when processing inbound traffic flowing from device drivers
to upper layers of the networking stack, and when executing
timer-driven networking functions.
This change also introduces a DDB subcommand to show the list of all
vnet instances.
Approved by: julian (mentor)
2009-05-05 10:56:12 +00:00
|
|
|
CURVNET_RESTORE();
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
goto bad;
|
2006-07-10 21:38:17 +00:00
|
|
|
if (*sa == NULL)
|
1997-08-16 19:16:27 +00:00
|
|
|
len = 0;
|
2006-07-10 21:38:17 +00:00
|
|
|
else
|
|
|
|
len = MIN(*alen, (*sa)->sa_len);
|
|
|
|
*alen = len;
|
2008-02-23 01:01:49 +00:00
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_STRUCT))
|
|
|
|
ktrsockaddr(*sa);
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
bad:
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0 && *sa != NULL) {
|
2006-07-10 21:38:17 +00:00
|
|
|
free(*sa, M_SONAME);
|
|
|
|
*sa = NULL;
|
|
|
|
}
|
2007-08-06 14:26:03 +00:00
|
|
|
done:
|
2004-10-24 23:45:01 +00:00
|
|
|
fdrop(fp, td);
|
1994-05-24 10:09:53 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1995-10-23 15:42:12 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
sys_getpeername(struct thread *td, struct getpeername_args *uap)
|
1995-10-23 15:42:12 +00:00
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
|
|
|
|
return (getpeername1(td, uap, 0));
|
1995-10-23 15:42:12 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef COMPAT_OLDSOCK
|
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
ogetpeername(struct thread *td, struct ogetpeername_args *uap)
|
1995-10-23 15:42:12 +00:00
|
|
|
{
|
2001-09-12 08:38:13 +00:00
|
|
|
|
1995-10-23 15:42:12 +00:00
|
|
|
/* XXX uap should have type `getpeername_args *' to begin with. */
|
2001-09-12 08:38:13 +00:00
|
|
|
return (getpeername1(td, (struct getpeername_args *)uap, 1));
|
1995-10-23 15:42:12 +00:00
|
|
|
}
|
|
|
|
#endif /* COMPAT_OLDSOCK */
|
1994-10-02 17:35:40 +00:00
|
|
|
|
2016-05-18 22:05:50 +00:00
|
|
|
static int
|
|
|
|
sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2007-05-16 20:41:08 +00:00
|
|
|
struct sockaddr *sa;
|
|
|
|
struct mbuf *m;
|
1994-05-24 10:09:53 +00:00
|
|
|
int error;
|
|
|
|
|
2013-03-15 10:17:24 +00:00
|
|
|
if (buflen > MLEN) {
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef COMPAT_OLDSOCK
|
2013-03-15 10:17:24 +00:00
|
|
|
if (type == MT_SONAME && buflen <= 112)
|
1994-05-24 10:09:53 +00:00
|
|
|
buflen = MLEN; /* unix domain compat. hack */
|
|
|
|
else
|
|
|
|
#endif
|
2013-03-15 10:17:24 +00:00
|
|
|
if (buflen > MCLBYTES)
|
2004-06-07 09:59:50 +00:00
|
|
|
return (EINVAL);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2013-03-15 10:17:24 +00:00
|
|
|
m = m_get2(buflen, M_WAITOK, type, 0);
|
1994-05-24 10:09:53 +00:00
|
|
|
m->m_len = buflen;
|
2016-05-18 22:05:50 +00:00
|
|
|
error = copyin(buf, mtod(m, void *), buflen);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0)
|
1994-05-24 10:09:53 +00:00
|
|
|
(void) m_free(m);
|
|
|
|
else {
|
|
|
|
*mp = m;
|
|
|
|
if (type == MT_SONAME) {
|
|
|
|
sa = mtod(m, struct sockaddr *);
|
|
|
|
|
|
|
|
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
|
|
|
|
if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
|
|
|
|
sa->sa_family = sa->sa_len;
|
|
|
|
#endif
|
|
|
|
sa->sa_len = buflen;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
1997-08-16 19:16:27 +00:00
|
|
|
int
|
2016-09-09 17:40:26 +00:00
|
|
|
getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len)
|
1997-08-16 19:16:27 +00:00
|
|
|
{
|
|
|
|
struct sockaddr *sa;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
if (len > SOCK_MAXADDRLEN)
|
2004-01-10 13:03:43 +00:00
|
|
|
return (ENAMETOOLONG);
|
2004-01-10 08:28:54 +00:00
|
|
|
if (len < offsetof(struct sockaddr, sa_data[0]))
|
2004-01-10 17:14:53 +00:00
|
|
|
return (EINVAL);
|
2008-10-23 15:53:51 +00:00
|
|
|
sa = malloc(len, M_SONAME, M_WAITOK);
|
1997-08-16 19:16:27 +00:00
|
|
|
error = copyin(uaddr, sa, len);
|
2013-09-05 00:17:38 +00:00
|
|
|
if (error != 0) {
|
2008-10-23 15:53:51 +00:00
|
|
|
free(sa, M_SONAME);
|
1997-08-16 19:16:27 +00:00
|
|
|
} else {
|
|
|
|
#if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
|
|
|
|
if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
|
|
|
|
sa->sa_family = sa->sa_len;
|
|
|
|
#endif
|
|
|
|
sa->sa_len = len;
|
|
|
|
*namp = sa;
|
|
|
|
}
|
2004-01-10 13:03:43 +00:00
|
|
|
return (error);
|
1997-08-16 19:16:27 +00:00
|
|
|
}
|