Extend mmap/mprotect API to specify the max page protections.

A new macro PROT_MAX() alters a protection value so it can be OR'd with
a regular protection value to specify the maximum permissions.  If
present, these flags specify the maximum permissions.

While these flags are non-portable, they can be used in portable code
with simple ifdefs to expand PROT_MAX() to 0.

This change allows (e.g.) a region that must be writable during run-time
linking or JIT code generation to be made permanently read+execute after
writes are complete.  This complements W^X protections allowing more
precise control by the programmer.

This change alters mprotect argument checking and returns an error when
unhandled protection flags are set.  This differs from POSIX (in that
POSIX only specifies an error), but is the documented behavior on Linux
and more closely matches historical mmap behavior.

In addition to explicit setting of the maximum permissions, an
experimental sysctl vm.imply_prot_max causes mmap to assume that the
initial permissions requested should be the maximum when the sysctl is
set to 1.  PROT_NONE mappings are excluded from this for compatibility
with rtld and other consumers that use such mappings to reserve
address space before mapping contents into part of the reservation.  A
final version this is expected to provide per-binary and per-process
opt-in/out options and this sysctl will go away in its current form.
As such it is undocumented.

Reviewed by:	emaste, kib (prior version), markj
Additional suggestions from:	alc
Obtained from:	CheriBSD
Sponsored by:	DARPA, AFRL
Differential Revision:	https://reviews.freebsd.org/D18880
This commit is contained in:
Brooks Davis 2019-06-20 18:24:16 +00:00
parent 0cf197862d
commit 74a1b66cf4
4 changed files with 95 additions and 8 deletions

View File

@ -28,7 +28,7 @@
.\" @(#)mmap.2 8.4 (Berkeley) 5/11/95
.\" $FreeBSD$
.\"
.Dd June 22, 2017
.Dd June 20, 2019
.Dt MMAP 2
.Os
.Sh NAME
@ -113,6 +113,22 @@ Pages may be written.
Pages may be executed.
.El
.Pp
In addition to these protection flags,
.Fx
provides the ability to set the maximum protection of a region allocated by
.Nm
and later altered by
.Xr mprotect 2 .
This is accomplished by
.Em or Ns 'ing
one or more
.Dv PROT_
values wrapped in the
.Dv PROT_MAX()
macro into the
.Fa prot
argument.
.Pp
The
.Fa flags
argument specifies the type of the mapped object, mapping options and
@ -416,6 +432,11 @@ An invalid value was passed in the
.Fa prot
argument.
.It Bq Er EINVAL
The
.Fa prot
argument contains permissions which are not a subset of the specified
maximum permissions.
.It Bq Er EINVAL
An undefined option was set in the
.Fa flags
argument.

View File

@ -28,7 +28,7 @@
.\" @(#)mprotect.2 8.1 (Berkeley) 6/9/93
.\" $FreeBSD$
.\"
.Dd August 3, 2016
.Dd June 20, 2019
.Dt MPROTECT 2
.Os
.Sh NAME
@ -65,6 +65,22 @@ The pages can be written.
.It Dv PROT_EXEC
The pages can be executed.
.El
.Pp
In addition to these protection flags,
.Fx
provides the ability to set the maximum protection of a region
(which prevents
.Nm
from upgrading the permissions).
This is accomplished by
.Em or Ns 'ing
one or more
.Dv PROT_
values wrapped in the
.Dv PROT_MAX()
macro into the
.Fa prot
argument.
.Sh RETURN VALUES
.Rv -std mprotect
.Sh ERRORS
@ -78,6 +94,15 @@ The virtual address range specified by the
and
.Fa len
arguments is not valid.
.It Bq Er EINVAL
The
.Fa prot
argument contains unhandled bits.
.It Bq Er EINVAL
The
.Fa prot
argument contains permissions which are not a subset of the specified
maximum permissions.
.It Bq Er EACCES
The calling process was not allowed to change
the protection to the value specified by

View File

@ -55,6 +55,14 @@
#define PROT_READ 0x01 /* pages can be read */
#define PROT_WRITE 0x02 /* pages can be written */
#define PROT_EXEC 0x04 /* pages can be executed */
#if __BSD_VISIBLE
#define _PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
#define PROT_EXTRACT(prot) ((prot) & _PROT_ALL)
#define _PROT_MAX_SHIFT 16
#define PROT_MAX(prot) ((prot) << _PROT_MAX_SHIFT)
#define PROT_MAX_EXTRACT(prot) (((prot) >> _PROT_MAX_SHIFT) & _PROT_ALL)
#endif
/*
* Flags contain sharing type and options.

View File

@ -103,6 +103,9 @@ SYSCTL_INT(_vm, OID_AUTO, old_mlock, CTLFLAG_RWTUN, &old_mlock, 0,
static int mincore_mapped = 1;
SYSCTL_INT(_vm, OID_AUTO, mincore_mapped, CTLFLAG_RWTUN, &mincore_mapped, 0,
"mincore reports mappings, not residency");
static int imply_prot_max = 0;
SYSCTL_INT(_vm, OID_AUTO, imply_prot_max, CTLFLAG_RWTUN, &imply_prot_max, 0,
"Imply maximum page permissions in mmap() when none are specified");
#ifdef MAP_32BIT
#define MAP_32BIT_MAX_ADDR ((vm_offset_t)1 << 31)
@ -187,9 +190,25 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags,
vm_offset_t addr;
vm_size_t pageoff, size;
vm_prot_t cap_maxprot;
int align, error;
int align, error, max_prot;
cap_rights_t rights;
if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
return (EINVAL);
max_prot = PROT_MAX_EXTRACT(prot);
prot = PROT_EXTRACT(prot);
if (max_prot != 0 && (max_prot & prot) != prot)
return (EINVAL);
/*
* Always honor PROT_MAX if set. If not, default to all
* permissions unless we're implying maximum permissions.
*
* XXX: should be tunable per process and ABI.
*/
if (max_prot == 0)
max_prot = (imply_prot_max && prot != PROT_NONE) ?
prot : _PROT_ALL;
vms = td->td_proc->p_vmspace;
fp = NULL;
AUDIT_ARG_FD(fd);
@ -335,7 +354,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags,
* This relies on VM_PROT_* matching PROT_*.
*/
error = vm_mmap_object(&vms->vm_map, &addr, size, prot,
VM_PROT_ALL, flags, NULL, pos, FALSE, td);
max_prot, flags, NULL, pos, FALSE, td);
} else {
/*
* Mapping file, get fp for validation and don't let the
@ -363,7 +382,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t len, int prot, int flags,
/* This relies on VM_PROT_* matching PROT_*. */
error = fo_mmap(fp, &vms->vm_map, &addr, size, prot,
cap_maxprot, flags, pos, td);
max_prot & cap_maxprot, flags, pos, td);
}
if (error == 0)
@ -594,9 +613,13 @@ kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot)
{
vm_offset_t addr;
vm_size_t pageoff;
int vm_error, max_prot;
addr = addr0;
prot = (prot & VM_PROT_ALL);
if ((prot & ~(_PROT_ALL | PROT_MAX(_PROT_ALL))) != 0)
return (EINVAL);
max_prot = PROT_MAX_EXTRACT(prot);
prot = PROT_EXTRACT(prot);
pageoff = (addr & PAGE_MASK);
addr -= pageoff;
size += pageoff;
@ -610,8 +633,18 @@ kern_mprotect(struct thread *td, uintptr_t addr0, size_t size, int prot)
if (addr + size < addr)
return (EINVAL);
switch (vm_map_protect(&td->td_proc->p_vmspace->vm_map, addr,
addr + size, prot, FALSE)) {
vm_error = KERN_SUCCESS;
if (max_prot != 0) {
if ((max_prot & prot) != prot)
return (EINVAL);
vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map,
addr, addr + size, max_prot, TRUE);
}
if (vm_error == KERN_SUCCESS)
vm_error = vm_map_protect(&td->td_proc->p_vmspace->vm_map,
addr, addr + size, prot, FALSE);
switch (vm_error) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE: