Implement address space guards.

Guard, requested by the MAP_GUARD mmap(2) flag, prevents the reuse of
the allocated address space, but does not allow instantiation of the
pages in the range.  It is useful for more explicit support for usual
two-stage reserve then commit allocators, since it prevents accidental
instantiation of the mapping, e.g. by mprotect(2).

Use guards to reimplement stack grow code.  Explicitely track stack
grow area with the guard, including the stack guard page.  On stack
grow, trivial shift of the guard map entry and stack map entry limits
makes the stack expansion.  Move the code to detect stack grow and
call vm_map_growstack(), from vm_fault() into vm_map_lookup().

As result, it is impossible to get random mapping to occur in the
stack grow area, or to overlap the stack guard page.

Enable stack guard page by default.

Reviewed by:	alc, markj
Man page update reviewed by:	alc, bjk, emaste, markj, pho
Tested by:	pho, Qualys
Sponsored by:	The FreeBSD Foundation
MFC after:	1 week
Differential revision:	https://reviews.freebsd.org/D11306 (man pages)
This commit is contained in:
Konstantin Belousov 2017-06-24 17:01:11 +00:00
parent 546bb2d7f0
commit 19bd0d9c85
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=320317
9 changed files with 257 additions and 233 deletions

View File

@ -199,6 +199,21 @@ In contrast, if
.Dv MAP_EXCL
is specified, the request will fail if a mapping
already exists within the range.
.It Dv MAP_GUARD
Instead of a mapping, create a guard of the specified size.
Guards allow a process to create reservations in its address space,
which can later be replaced by actual mappings.
.Pp
.Fa mmap
will not create mappings in the address range of a guard unless
the request specifies
.Dv MAP_FIXED .
Guards can be destroyed with
.Xr munmap 2 .
Any memory access by a thread to the guarded range results
in the delivery of a
.Dv SIGSEGV
signal to that thread.
.It Dv MAP_NOCORE
Region is not included in a core file.
.It Dv MAP_NOSYNC
@ -303,6 +318,7 @@ must include at least
.Dv PROT_READ
and
.Dv PROT_WRITE .
.Pp
This option creates
a memory region that grows to at most
.Fa len
@ -313,6 +329,10 @@ stack top is the starting address returned by the call, plus
bytes.
The bottom of the stack at maximum growth is the starting
address returned by the call.
The system uses guards to prevent the inadvertent use of
regions into which stacks created with
.Dv MAP_STACK
will automatically grow, without mapping the whole stack in advance.
.El
.Pp
The
@ -406,6 +426,7 @@ were specified.
.It Bq Er EINVAL
None of
.Dv MAP_ANON ,
.Dv MAP_GUARD ,
.Dv MAP_PRIVATE ,
.Dv MAP_SHARED ,
or
@ -455,6 +476,25 @@ were specified, but the requested region is already used by a mapping.
was specified, but
.Dv MAP_FIXED
was not.
.It Bq Er EINVAL
.Dv MAP_GUARD
was specified, but the
.Fa offset
argument was not zero, the
.Fa fd
argument was not -1, or the
.Fa prot
argument was not
.Dv PROT_NONE .
.It Bq Er EINVAL
.Dv MAP_GUARD
was specified together with one of the flags
.Dv MAP_ANON ,
.Dv MAP_PREFAULT ,
.Dv MAP_PREFAULT_READ ,
.Dv MAP_PRIVATE ,
.Dv MAP_SHARED ,
.Dv MAP_STACK .
.It Bq Er ENODEV
.Dv MAP_ANON
has not been specified and

View File

@ -28,7 +28,7 @@
.\" @(#)munmap.2 8.3 (Berkeley) 5/27/94
.\" $FreeBSD$
.\"
.Dd May 27, 1994
.Dd June 22, 2017
.Dt MUNMAP 2
.Os
.Sh NAME
@ -44,7 +44,7 @@
The
.Fn munmap
system call
deletes the mappings for the specified address range,
deletes the mappings and guards for the specified address range,
and causes further references to addresses within the range
to generate invalid memory references.
.Sh RETURN VALUES

View File

@ -90,6 +90,7 @@
/*
* Extended flags
*/
#define MAP_GUARD 0x00002000 /* reserve but don't map address range */
#define MAP_EXCL 0x00004000 /* for MAP_FIXED, fail if address is used */
#define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */
#define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */

View File

@ -58,7 +58,7 @@
* in the range 5 to 9.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1200034 /* Master, propagated to newvers */
#define __FreeBSD_version 1200035 /* Master, propagated to newvers */
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
@ -76,12 +76,13 @@
#undef __FreeBSD_kernel__
#define __FreeBSD_kernel__
#ifdef _KERNEL
#if defined(_KERNEL) || defined(IN_RTLD)
#define P_OSREL_SIGWAIT 700000
#define P_OSREL_SIGSEGV 700004
#define P_OSREL_MAP_ANON 800104
#define P_OSREL_MAP_FSTRICT 1100036
#define P_OSREL_SHUTDOWN_ENOTCONN 1100077
#define P_OSREL_MAP_GUARD 1200035
#define P_OSREL_MAJOR(x) ((x) / 100000)
#endif

View File

@ -78,6 +78,7 @@ typedef u_char vm_prot_t; /* protection codes */
#define VM_PROT_WRITE ((vm_prot_t) 0x02)
#define VM_PROT_EXECUTE ((vm_prot_t) 0x04)
#define VM_PROT_COPY ((vm_prot_t) 0x08) /* copy-on-read */
#define VM_PROT_FAULT_LOOKUP ((vm_prot_t) 0x010)
#define VM_PROT_ALL (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE)
#define VM_PROT_RW (VM_PROT_READ|VM_PROT_WRITE)

View File

@ -495,13 +495,12 @@ vm_fault_hold(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
int locked, nera, result, rv;
u_char behavior;
boolean_t wired; /* Passed by reference. */
bool dead, growstack, hardfault, is_first_object_locked;
bool dead, hardfault, is_first_object_locked;
VM_CNT_INC(v_vm_faults);
fs.vp = NULL;
faultcount = 0;
nera = -1;
growstack = true;
hardfault = false;
RetryFault:;
@ -511,17 +510,10 @@ RetryFault:;
* search.
*/
fs.map = map;
result = vm_map_lookup(&fs.map, vaddr, fault_type, &fs.entry,
&fs.first_object, &fs.first_pindex, &prot, &wired);
result = vm_map_lookup(&fs.map, vaddr, fault_type |
VM_PROT_FAULT_LOOKUP, &fs.entry, &fs.first_object,
&fs.first_pindex, &prot, &wired);
if (result != KERN_SUCCESS) {
if (growstack && result == KERN_INVALID_ADDRESS &&
map != kernel_map) {
result = vm_map_growstack(curproc, vaddr);
if (result != KERN_SUCCESS)
return (KERN_FAILURE);
growstack = false;
goto RetryFault;
}
unlock_vp(&fs);
return (result);
}
@ -547,6 +539,8 @@ RetryFault:;
goto RetryFault;
}
MPASS((fs.entry->eflags & MAP_ENTRY_GUARD) == 0);
if (wired)
fault_type = prot | (fault_type & VM_PROT_COPY);
else

View File

@ -133,6 +133,8 @@ static void _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min,
static void vm_map_entry_deallocate(vm_map_entry_t entry, boolean_t system_map);
static void vm_map_entry_dispose(vm_map_t map, vm_map_entry_t entry);
static void vm_map_entry_unwire(vm_map_t map, vm_map_entry_t entry);
static int vm_map_growstack(vm_map_t map, vm_offset_t addr,
vm_map_entry_t gap_entry);
static void vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
vm_object_t object, vm_pindex_t pindex, vm_size_t size, int flags);
#ifdef INVARIANTS
@ -1214,6 +1216,10 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
if (prev_entry->next != &map->header && prev_entry->next->start < end)
return (KERN_NO_SPACE);
if ((cow & MAP_CREATE_GUARD) != 0 && (object != NULL ||
max != VM_PROT_NONE))
return (KERN_INVALID_ARGUMENT);
protoeflags = 0;
if (cow & MAP_COPY_ON_WRITE)
protoeflags |= MAP_ENTRY_COW | MAP_ENTRY_NEEDS_COPY;
@ -1229,13 +1235,19 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
protoeflags |= MAP_ENTRY_GROWS_UP;
if (cow & MAP_VN_WRITECOUNT)
protoeflags |= MAP_ENTRY_VN_WRITECNT;
if ((cow & MAP_CREATE_GUARD) != 0)
protoeflags |= MAP_ENTRY_GUARD;
if ((cow & MAP_CREATE_STACK_GAP_DN) != 0)
protoeflags |= MAP_ENTRY_STACK_GAP_DN;
if ((cow & MAP_CREATE_STACK_GAP_UP) != 0)
protoeflags |= MAP_ENTRY_STACK_GAP_UP;
if (cow & MAP_INHERIT_SHARE)
inheritance = VM_INHERIT_SHARE;
else
inheritance = VM_INHERIT_DEFAULT;
cred = NULL;
if (cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT))
if ((cow & (MAP_ACC_NO_CHARGE | MAP_NOFAULT | MAP_CREATE_GUARD)) != 0)
goto charged;
if ((cow & MAP_ACC_CHARGED) || ((prot & VM_PROT_WRITE) &&
((protoeflags & MAP_ENTRY_NEEDS_COPY) || object == NULL))) {
@ -1284,7 +1296,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
if (prev_entry->inheritance == inheritance &&
prev_entry->protection == prot &&
prev_entry->max_protection == max) {
map->size += end - prev_entry->end;
if ((prev_entry->eflags & MAP_ENTRY_GUARD) == 0)
map->size += end - prev_entry->end;
prev_entry->end = end;
vm_map_entry_resize_free(map, prev_entry);
vm_map_simplify_entry(map, prev_entry);
@ -1321,7 +1334,6 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
new_entry->eflags = protoeflags;
new_entry->object.vm_object = object;
new_entry->offset = offset;
new_entry->avail_ssize = 0;
new_entry->inheritance = inheritance;
new_entry->protection = prot;
@ -1339,7 +1351,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
* Insert the new entry into the list
*/
vm_map_entry_link(map, prev_entry, new_entry);
map->size += new_entry->end - new_entry->start;
if ((new_entry->eflags & MAP_ENTRY_GUARD) == 0)
map->size += new_entry->end - new_entry->start;
/*
* Try to coalesce the new entry with both the previous and next
@ -1674,7 +1687,8 @@ _vm_map_clip_start(vm_map_t map, vm_map_entry_t entry, vm_offset_t start)
* map. This is a bit of a hack, but is also about the best place to
* put this improvement.
*/
if (entry->object.vm_object == NULL && !map->system_map) {
if (entry->object.vm_object == NULL && !map->system_map &&
(entry->eflags & MAP_ENTRY_GUARD) == 0) {
vm_object_t object;
object = vm_object_allocate(OBJT_DEFAULT,
atop(entry->end - entry->start));
@ -1753,7 +1767,8 @@ _vm_map_clip_end(vm_map_t map, vm_map_entry_t entry, vm_offset_t end)
* map. This is a bit of a hack, but is also about the best place to
* put this improvement.
*/
if (entry->object.vm_object == NULL && !map->system_map) {
if (entry->object.vm_object == NULL && !map->system_map &&
(entry->eflags & MAP_ENTRY_GUARD) == 0) {
vm_object_t object;
object = vm_object_allocate(OBJT_DEFAULT,
atop(entry->end - entry->start));
@ -2010,7 +2025,8 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
if (set_max ||
((new_prot & ~(current->protection)) & VM_PROT_WRITE) == 0 ||
ENTRY_CHARGED(current)) {
ENTRY_CHARGED(current) ||
(current->eflags & MAP_ENTRY_GUARD) != 0) {
continue;
}
@ -2059,6 +2075,9 @@ vm_map_protect(vm_map_t map, vm_offset_t start, vm_offset_t end,
*/
for (current = entry; current != &map->header && current->start < end;
current = current->next) {
if ((current->eflags & MAP_ENTRY_GUARD) != 0)
continue;
old_prot = current->protection;
if (set_max)
@ -2312,7 +2331,9 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end,
entry = temp_entry->next;
while ((entry != &map->header) && (entry->start < end)) {
vm_map_clip_end(map, entry, end);
entry->inheritance = new_inheritance;
if ((entry->eflags & MAP_ENTRY_GUARD) == 0 ||
new_inheritance != VM_INHERIT_ZERO)
entry->inheritance = new_inheritance;
vm_map_simplify_entry(map, entry);
entry = entry->next;
}
@ -2918,6 +2939,15 @@ vm_map_entry_delete(vm_map_t map, vm_map_entry_t entry)
vm_map_entry_unlink(map, entry);
object = entry->object.vm_object;
if ((entry->eflags & MAP_ENTRY_GUARD) != 0) {
MPASS(entry->cred == NULL);
MPASS((entry->eflags & MAP_ENTRY_IS_SUB_MAP) == 0);
MPASS(object == NULL);
vm_map_entry_deallocate(entry, map->system_map);
return;
}
size = entry->end - entry->start;
map->size -= size;
@ -3276,6 +3306,8 @@ vmspace_map_entry_forked(const struct vmspace *vm1, struct vmspace *vm2,
vm_size_t entrysize;
vm_offset_t newend;
if ((entry->eflags & MAP_ENTRY_GUARD) != 0)
return;
entrysize = entry->end - entry->start;
vm2->vm_map.size += entrysize;
if (entry->eflags & (MAP_ENTRY_GROWS_DOWN | MAP_ENTRY_GROWS_UP)) {
@ -3312,6 +3344,7 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
vm_map_entry_t new_entry, old_entry;
vm_object_t object;
int locked;
vm_inherit_t inh;
old_map = &vm1->vm_map;
/* Copy immutable fields of vm1 to vm2. */
@ -3334,7 +3367,12 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
if (old_entry->eflags & MAP_ENTRY_IS_SUB_MAP)
panic("vm_map_fork: encountered a submap");
switch (old_entry->inheritance) {
inh = old_entry->inheritance;
if ((old_entry->eflags & MAP_ENTRY_GUARD) != 0 &&
inh != VM_INHERIT_NONE)
inh = VM_INHERIT_COPY;
switch (inh) {
case VM_INHERIT_NONE:
break;
@ -3467,7 +3505,6 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge)
new_entry->start = old_entry->start;
new_entry->end = old_entry->end;
new_entry->avail_ssize = old_entry->avail_ssize;
new_entry->eflags = old_entry->eflags &
~(MAP_ENTRY_USER_WIRED | MAP_ENTRY_IN_TRANSITION |
MAP_ENTRY_VN_WRITECNT);
@ -3535,7 +3572,7 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
vm_size_t growsize, vm_prot_t prot, vm_prot_t max, int cow)
{
vm_map_entry_t new_entry, prev_entry;
vm_offset_t bot, top;
vm_offset_t bot, gap_bot, gap_top, top;
vm_size_t init_ssize;
int orient, rv;
@ -3543,10 +3580,11 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
* The stack orientation is piggybacked with the cow argument.
* Extract it into orient and mask the cow argument so that we
* don't pass it around further.
* NOTE: We explicitly allow bi-directional stacks.
*/
orient = cow & (MAP_STACK_GROWS_DOWN|MAP_STACK_GROWS_UP);
orient = cow & (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP);
KASSERT(orient != 0, ("No stack grow direction"));
KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
("bi-dir stack"));
if (addrbos < vm_map_min(map) ||
addrbos > vm_map_max(map) ||
@ -3582,57 +3620,58 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
* and cow to be 0. Possibly we should eliminate these as input
* parameters, and just pass these values here in the insert call.
*/
if (orient == MAP_STACK_GROWS_DOWN)
if (orient == MAP_STACK_GROWS_DOWN) {
bot = addrbos + max_ssize - init_ssize;
else if (orient == MAP_STACK_GROWS_UP)
top = bot + init_ssize;
gap_bot = addrbos;
gap_top = bot;
} else /* if (orient == MAP_STACK_GROWS_UP) */ {
bot = addrbos;
else
bot = round_page(addrbos + max_ssize/2 - init_ssize/2);
top = bot + init_ssize;
rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
/* Now set the avail_ssize amount. */
if (rv == KERN_SUCCESS) {
new_entry = prev_entry->next;
if (new_entry->end != top || new_entry->start != bot)
panic("Bad entry start/end for new stack entry");
new_entry->avail_ssize = max_ssize - init_ssize;
KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
(new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
("new entry lacks MAP_ENTRY_GROWS_DOWN"));
KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
(new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
("new entry lacks MAP_ENTRY_GROWS_UP"));
top = bot + init_ssize;
gap_bot = top;
gap_top = addrbos + max_ssize;
}
rv = vm_map_insert(map, NULL, 0, bot, top, prot, max, cow);
if (rv != KERN_SUCCESS)
return (rv);
new_entry = prev_entry->next;
KASSERT(new_entry->end == top || new_entry->start == bot,
("Bad entry start/end for new stack entry"));
KASSERT((orient & MAP_STACK_GROWS_DOWN) == 0 ||
(new_entry->eflags & MAP_ENTRY_GROWS_DOWN) != 0,
("new entry lacks MAP_ENTRY_GROWS_DOWN"));
KASSERT((orient & MAP_STACK_GROWS_UP) == 0 ||
(new_entry->eflags & MAP_ENTRY_GROWS_UP) != 0,
("new entry lacks MAP_ENTRY_GROWS_UP"));
rv = vm_map_insert(map, NULL, 0, gap_bot, gap_top, VM_PROT_NONE,
VM_PROT_NONE, MAP_CREATE_GUARD | (orient == MAP_STACK_GROWS_DOWN ?
MAP_CREATE_STACK_GAP_DN : MAP_CREATE_STACK_GAP_UP));
if (rv != KERN_SUCCESS)
(void)vm_map_delete(map, bot, top);
return (rv);
}
static int stack_guard_page = 0;
static int stack_guard_page = 1;
SYSCTL_INT(_security_bsd, OID_AUTO, stack_guard_page, CTLFLAG_RWTUN,
&stack_guard_page, 0,
"Insert stack guard page ahead of the growable segments.");
"Specifies the number of guard pages for a stack that grows.");
/* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if the
* desired address is already mapped, or if we successfully grow
* the stack. Also returns KERN_SUCCESS if addr is outside the
* stack range (this is strange, but preserves compatibility with
* the grow function in vm_machdep.c).
/*
* Attempts to grow a vm stack entry. Returns KERN_SUCCESS if we
* successfully grow the stack.
*/
int
vm_map_growstack(struct proc *p, vm_offset_t addr)
static int
vm_map_growstack(vm_map_t map, vm_offset_t addr, vm_map_entry_t gap_entry)
{
vm_map_entry_t next_entry, prev_entry;
vm_map_entry_t new_entry, stack_entry;
struct vmspace *vm = p->p_vmspace;
vm_map_t map = &vm->vm_map;
vm_offset_t end;
vm_size_t growsize;
vm_map_entry_t stack_entry;
struct proc *p;
struct vmspace *vm;
struct ucred *cred;
vm_offset_t gap_end, gap_start, grow_start;
size_t grow_amount, max_grow;
rlim_t lmemlim, stacklim, vmemlim;
int is_procstack, rv;
struct ucred *cred;
int rv, rv1;
bool gap_deleted, grow_down, is_procstack;
#ifdef notyet
uint64_t limit;
#endif
@ -3640,125 +3679,71 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
int error;
#endif
p = curproc;
vm = p->p_vmspace;
MPASS(map == &p->p_vmspace->vm_map);
MPASS(!map->system_map);
lmemlim = lim_cur(curthread, RLIMIT_MEMLOCK);
stacklim = lim_cur(curthread, RLIMIT_STACK);
vmemlim = lim_cur(curthread, RLIMIT_VMEM);
Retry:
vm_map_lock_read(map);
/* If addr is already in the entry range, no need to grow.*/
if (vm_map_lookup_entry(map, addr, &prev_entry)) {
vm_map_unlock_read(map);
retry:
/* If addr is not in a hole for a stack grow area, no need to grow. */
if (gap_entry == NULL && !vm_map_lookup_entry(map, addr, &gap_entry))
return (KERN_FAILURE);
if ((gap_entry->eflags & MAP_ENTRY_GUARD) == 0)
return (KERN_SUCCESS);
}
next_entry = prev_entry->next;
if (!(prev_entry->eflags & MAP_ENTRY_GROWS_UP)) {
/*
* This entry does not grow upwards. Since the address lies
* beyond this entry, the next entry (if one exists) has to
* be a downward growable entry. The entry list header is
* never a growable entry, so it suffices to check the flags.
*/
if (!(next_entry->eflags & MAP_ENTRY_GROWS_DOWN)) {
vm_map_unlock_read(map);
return (KERN_SUCCESS);
}
stack_entry = next_entry;
if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_DN) != 0) {
stack_entry = gap_entry->next;
if ((stack_entry->eflags & MAP_ENTRY_GROWS_DOWN) == 0 ||
stack_entry->start != gap_entry->end)
return (KERN_FAILURE);
grow_amount = round_page(stack_entry->start - addr);
grow_down = true;
} else if ((gap_entry->eflags & MAP_ENTRY_STACK_GAP_UP) != 0) {
stack_entry = gap_entry->prev;
if ((stack_entry->eflags & MAP_ENTRY_GROWS_UP) == 0 ||
stack_entry->end != gap_entry->start)
return (KERN_FAILURE);
grow_amount = round_page(addr + 1 - stack_entry->end);
grow_down = false;
} else {
/*
* This entry grows upward. If the next entry does not at
* least grow downwards, this is the entry we need to grow.
* otherwise we have two possible choices and we have to
* select one.
*/
if (next_entry->eflags & MAP_ENTRY_GROWS_DOWN) {
/*
* We have two choices; grow the entry closest to
* the address to minimize the amount of growth.
*/
if (addr - prev_entry->end <= next_entry->start - addr)
stack_entry = prev_entry;
else
stack_entry = next_entry;
} else
stack_entry = prev_entry;
return (KERN_FAILURE);
}
if (stack_entry == next_entry) {
KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_DOWN, ("foo"));
KASSERT(addr < stack_entry->start, ("foo"));
end = (prev_entry != &map->header) ? prev_entry->end :
stack_entry->start - stack_entry->avail_ssize;
grow_amount = roundup(stack_entry->start - addr, PAGE_SIZE);
max_grow = stack_entry->start - end;
} else {
KASSERT(stack_entry->eflags & MAP_ENTRY_GROWS_UP, ("foo"));
KASSERT(addr >= stack_entry->end, ("foo"));
end = (next_entry != &map->header) ? next_entry->start :
stack_entry->end + stack_entry->avail_ssize;
grow_amount = roundup(addr + 1 - stack_entry->end, PAGE_SIZE);
max_grow = end - stack_entry->end;
}
if (grow_amount > stack_entry->avail_ssize) {
vm_map_unlock_read(map);
max_grow = gap_entry->end - gap_entry->start - stack_guard_page *
PAGE_SIZE;
if (grow_amount > max_grow)
return (KERN_NO_SPACE);
}
/*
* If there is no longer enough space between the entries nogo, and
* adjust the available space. Note: this should only happen if the
* user has mapped into the stack area after the stack was created,
* and is probably an error.
*
* This also effectively destroys any guard page the user might have
* intended by limiting the stack size.
*/
if (grow_amount + (stack_guard_page ? PAGE_SIZE : 0) > max_grow) {
if (vm_map_lock_upgrade(map))
goto Retry;
stack_entry->avail_ssize = max_grow;
vm_map_unlock(map);
return (KERN_NO_SPACE);
}
is_procstack = (addr >= (vm_offset_t)vm->vm_maxsaddr &&
addr < (vm_offset_t)p->p_sysent->sv_usrstack) ? 1 : 0;
/*
* If this is the main process stack, see if we're over the stack
* limit.
*/
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
vm_map_unlock_read(map);
is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr &&
addr < (vm_offset_t)p->p_sysent->sv_usrstack;
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim))
return (KERN_NO_SPACE);
}
#ifdef RACCT
if (racct_enable) {
PROC_LOCK(p);
if (is_procstack && racct_set(p, RACCT_STACK,
ctob(vm->vm_ssize) + grow_amount)) {
PROC_UNLOCK(p);
vm_map_unlock_read(map);
return (KERN_NO_SPACE);
}
PROC_UNLOCK(p);
}
#endif
/* Round up the grow amount modulo sgrowsiz */
growsize = sgrowsiz;
grow_amount = roundup(grow_amount, growsize);
if (grow_amount > stack_entry->avail_ssize)
grow_amount = stack_entry->avail_ssize;
grow_amount = roundup(grow_amount, sgrowsiz);
if (grow_amount > max_grow)
grow_amount = max_grow;
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > stacklim)) {
grow_amount = trunc_page((vm_size_t)stacklim) -
ctob(vm->vm_ssize);
}
#ifdef notyet
PROC_LOCK(p);
limit = racct_get_available(p, RACCT_STACK);
@ -3766,9 +3751,9 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount > limit))
grow_amount = limit - ctob(vm->vm_ssize);
#endif
if (!old_mlock && map->flags & MAP_WIREFUTURE) {
if (!old_mlock && (map->flags & MAP_WIREFUTURE) != 0) {
if (ptoa(pmap_wired_count(map->pmap)) + grow_amount > lmemlim) {
vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@ -3778,7 +3763,6 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
if (racct_set(p, RACCT_MEMLOCK,
ptoa(pmap_wired_count(map->pmap)) + grow_amount)) {
PROC_UNLOCK(p);
vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@ -3786,9 +3770,9 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
}
#endif
}
/* If we would blow our VMEM resource limit, no go */
if (map->size + grow_amount > vmemlim) {
vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@ -3797,7 +3781,6 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
PROC_LOCK(p);
if (racct_set(p, RACCT_VMEM, map->size + grow_amount)) {
PROC_UNLOCK(p);
vm_map_unlock_read(map);
rv = KERN_NO_SPACE;
goto out;
}
@ -3805,62 +3788,42 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
}
#endif
if (vm_map_lock_upgrade(map))
goto Retry;
if (vm_map_lock_upgrade(map)) {
gap_entry = NULL;
vm_map_lock_read(map);
goto retry;
}
if (stack_entry == next_entry) {
/*
* Growing downward.
*/
/* Get the preliminary new entry start value */
addr = stack_entry->start - grow_amount;
/*
* If this puts us into the previous entry, cut back our
* growth to the available space. Also, see the note above.
*/
if (addr < end) {
stack_entry->avail_ssize = max_grow;
addr = end;
if (stack_guard_page)
addr += PAGE_SIZE;
if (grow_down) {
grow_start = gap_entry->end - grow_amount;
if (gap_entry->start + grow_amount == gap_entry->end) {
gap_start = gap_entry->start;
gap_end = gap_entry->end;
vm_map_entry_delete(map, gap_entry);
gap_deleted = true;
} else {
MPASS(gap_entry->start < gap_entry->end - grow_amount);
gap_entry->end -= grow_amount;
vm_map_entry_resize_free(map, gap_entry);
gap_deleted = false;
}
rv = vm_map_insert(map, NULL, 0, addr, stack_entry->start,
next_entry->protection, next_entry->max_protection,
rv = vm_map_insert(map, NULL, 0, grow_start,
grow_start + grow_amount,
stack_entry->protection, stack_entry->max_protection,
MAP_STACK_GROWS_DOWN);
/* Adjust the available stack space by the amount we grew. */
if (rv == KERN_SUCCESS) {
new_entry = prev_entry->next;
KASSERT(new_entry == stack_entry->prev, ("foo"));
KASSERT(new_entry->end == stack_entry->start, ("foo"));
KASSERT(new_entry->start == addr, ("foo"));
KASSERT((new_entry->eflags & MAP_ENTRY_GROWS_DOWN) !=
0, ("new entry lacks MAP_ENTRY_GROWS_DOWN"));
grow_amount = new_entry->end - new_entry->start;
new_entry->avail_ssize = stack_entry->avail_ssize -
grow_amount;
stack_entry->eflags &= ~MAP_ENTRY_GROWS_DOWN;
if (rv != KERN_SUCCESS) {
if (gap_deleted) {
rv1 = vm_map_insert(map, NULL, 0, gap_start,
gap_end, VM_PROT_NONE, VM_PROT_NONE,
MAP_CREATE_GUARD | MAP_CREATE_STACK_GAP_DN);
MPASS(rv1 == KERN_SUCCESS);
} else {
gap_entry->end += grow_amount;
vm_map_entry_resize_free(map, gap_entry);
}
}
} else {
/*
* Growing upward.
*/
addr = stack_entry->end + grow_amount;
/*
* If this puts us into the next entry, cut back our growth
* to the available space. Also, see the note above.
*/
if (addr > end) {
stack_entry->avail_ssize = end - stack_entry->end;
addr = end;
if (stack_guard_page)
addr -= PAGE_SIZE;
}
grow_amount = addr - stack_entry->end;
grow_start = stack_entry->end;
cred = stack_entry->cred;
if (cred == NULL && stack_entry->object.vm_object != NULL)
cred = stack_entry->object.vm_object->cred;
@ -3872,30 +3835,30 @@ vm_map_growstack(struct proc *p, vm_offset_t addr)
stack_entry->offset,
(vm_size_t)(stack_entry->end - stack_entry->start),
(vm_size_t)grow_amount, cred != NULL)) {
map->size += (addr - stack_entry->end);
/* Update the current entry. */
stack_entry->end = addr;
stack_entry->avail_ssize -= grow_amount;
if (gap_entry->start + grow_amount == gap_entry->end)
vm_map_entry_delete(map, gap_entry);
else
gap_entry->start += grow_amount;
stack_entry->end += grow_amount;
map->size += grow_amount;
vm_map_entry_resize_free(map, stack_entry);
rv = KERN_SUCCESS;
} else
rv = KERN_FAILURE;
}
if (rv == KERN_SUCCESS && is_procstack)
vm->vm_ssize += btoc(grow_amount);
vm_map_unlock(map);
/*
* Heed the MAP_WIREFUTURE flag if it was set for this process.
*/
if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) {
vm_map_wire(map,
(stack_entry == next_entry) ? addr : addr - grow_amount,
(stack_entry == next_entry) ? stack_entry->start : addr,
if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE) != 0) {
vm_map_unlock(map);
vm_map_wire(map, grow_start, grow_start + grow_amount,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
}
vm_map_lock_read(map);
} else
vm_map_lock_downgrade(map);
out:
#ifdef RACCT
@ -4019,10 +3982,11 @@ vm_map_lookup(vm_map_t *var_map, /* IN/OUT */
vm_size_t size;
struct ucred *cred;
RetryLookup:;
RetryLookup:
vm_map_lock_read(map);
RetryLookupLocked:
/*
* Lookup the faulting address.
*/
@ -4048,7 +4012,16 @@ RetryLookup:;
* Check whether this task is allowed to have this page.
*/
prot = entry->protection;
fault_type &= (VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE);
if ((fault_typea & VM_PROT_FAULT_LOOKUP) != 0) {
fault_typea &= ~VM_PROT_FAULT_LOOKUP;
if (prot == VM_PROT_NONE && map != kernel_map &&
(entry->eflags & MAP_ENTRY_GUARD) != 0 &&
(entry->eflags & (MAP_ENTRY_STACK_GAP_DN |
MAP_ENTRY_STACK_GAP_UP)) != 0 &&
vm_map_growstack(map, vaddr, entry) == KERN_SUCCESS)
goto RetryLookupLocked;
}
fault_type &= VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE;
if ((fault_type & prot) != fault_type || prot == VM_PROT_NONE) {
vm_map_unlock_read(map);
return (KERN_PROTECTION_FAILURE);
@ -4282,8 +4255,9 @@ vm_map_print(vm_map_t map)
db_indent += 2;
for (entry = map->header.next; entry != &map->header;
entry = entry->next) {
db_iprintf("map entry %p: start=%p, end=%p\n",
(void *)entry, (void *)entry->start, (void *)entry->end);
db_iprintf("map entry %p: start=%p, end=%p, eflags=%#x, \n",
(void *)entry, (void *)entry->start, (void *)entry->end,
entry->eflags);
{
static char *inheritance_name[4] =
{"share", "copy", "none", "donate_copy"};

View File

@ -103,7 +103,6 @@ struct vm_map_entry {
struct vm_map_entry *right; /* right child in binary search tree */
vm_offset_t start; /* start address */
vm_offset_t end; /* end address */
vm_offset_t avail_ssize; /* amt can grow if this is a stack */
vm_offset_t next_read; /* vaddr of the next sequential read */
vm_size_t adj_free; /* amount of adjacent free space */
vm_size_t max_free; /* max free space in subtree */
@ -142,6 +141,9 @@ struct vm_map_entry {
#define MAP_ENTRY_WIRE_SKIPPED 0x4000
#define MAP_ENTRY_VN_WRITECNT 0x8000 /* writeable vnode mapping */
#define MAP_ENTRY_GUARD 0x10000
#define MAP_ENTRY_STACK_GAP_DN 0x20000
#define MAP_ENTRY_STACK_GAP_UP 0x40000
#ifdef _KERNEL
static __inline u_char
@ -315,6 +317,7 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_PREFAULT_PARTIAL 0x0010
#define MAP_DISABLE_SYNCER 0x0020
#define MAP_CHECK_EXCL 0x0040
#define MAP_CREATE_GUARD 0x0080
#define MAP_DISABLE_COREDUMP 0x0100
#define MAP_PREFAULT_MADVISE 0x0200 /* from (user) madvise request */
#define MAP_VN_WRITECOUNT 0x0400
@ -322,6 +325,8 @@ long vmspace_resident_count(struct vmspace *vmspace);
#define MAP_STACK_GROWS_UP 0x2000
#define MAP_ACC_CHARGED 0x4000
#define MAP_ACC_NO_CHARGE 0x8000
#define MAP_CREATE_STACK_GAP_UP 0x10000
#define MAP_CREATE_STACK_GAP_DN 0x20000
/*
* vm_fault option flags
@ -387,7 +392,6 @@ int vm_map_submap (vm_map_t, vm_offset_t, vm_offset_t, vm_map_t);
int vm_map_sync(vm_map_t, vm_offset_t, vm_offset_t, boolean_t, boolean_t);
int vm_map_madvise (vm_map_t, vm_offset_t, vm_offset_t, int);
int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int);
int vm_map_growstack (struct proc *p, vm_offset_t addr);
int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end,
int flags);
int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end,

View File

@ -226,7 +226,7 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
}
if ((flags & ~(MAP_SHARED | MAP_PRIVATE | MAP_FIXED | MAP_HASSEMAPHORE |
MAP_STACK | MAP_NOSYNC | MAP_ANON | MAP_EXCL | MAP_NOCORE |
MAP_PREFAULT_READ |
MAP_PREFAULT_READ | MAP_GUARD |
#ifdef MAP_32BIT
MAP_32BIT |
#endif
@ -239,6 +239,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
if (prot != PROT_NONE &&
(prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0)
return (EINVAL);
if ((flags & MAP_GUARD) != 0 && (prot != PROT_NONE || fd != -1 ||
pos != 0 || (flags & (MAP_SHARED | MAP_PRIVATE | MAP_PREFAULT |
MAP_PREFAULT_READ | MAP_ANON | MAP_STACK)) != 0))
return (EINVAL);
/*
* Align the file position to a page boundary,
@ -314,7 +318,10 @@ kern_mmap(struct thread *td, uintptr_t addr0, size_t size, int prot, int flags,
* returns an error earlier.
*/
error = 0;
} else if (flags & MAP_ANON) {
} else if ((flags & MAP_GUARD) != 0) {
error = vm_mmap_object(&vms->vm_map, &addr, size, VM_PROT_NONE,
VM_PROT_NONE, flags, NULL, pos, FALSE, td);
} else if ((flags & MAP_ANON) != 0) {
/*
* Mapping blank space is trivial.
*
@ -1511,6 +1518,8 @@ vm_mmap_object(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
}
if ((flags & MAP_EXCL) != 0)
docow |= MAP_CHECK_EXCL;
if ((flags & MAP_GUARD) != 0)
docow |= MAP_CREATE_GUARD;
if (fitit) {
if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER)