diff --git a/lib/libc/alpha/sys/Makefile.inc b/lib/libc/alpha/sys/Makefile.inc index 40398133a009..d0b21ed8d2d9 100644 --- a/lib/libc/alpha/sys/Makefile.inc +++ b/lib/libc/alpha/sys/Makefile.inc @@ -5,7 +5,7 @@ MDASM+= Ovfork.S brk.S cerror.S exect.S fork.S pipe.S ptrace.S \ # Don't generate default code for these syscalls: NOASM= break.o exit.o ftruncate.o getdomainname.o getlogin.o \ - lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \ + lseek.o mmap.o openbsd_poll.o pread.o \ pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o diff --git a/lib/libc/amd64/sys/Makefile.inc b/lib/libc/amd64/sys/Makefile.inc index adfa84855a20..897ed4f7fc62 100644 --- a/lib/libc/amd64/sys/Makefile.inc +++ b/lib/libc/amd64/sys/Makefile.inc @@ -6,7 +6,7 @@ MDASM= vfork.S brk.S cerror.S exect.S pipe.S ptrace.S reboot.S sbrk.S \ # Don't generate default code for these syscalls: NOASM= break.o exit.o ftruncate.o getdomainname.o getlogin.o \ - lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \ + lseek.o mmap.o openbsd_poll.o pread.o \ pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o diff --git a/lib/libc/i386/sys/Makefile.inc b/lib/libc/i386/sys/Makefile.inc index d928ea9045cc..ad7660987931 100644 --- a/lib/libc/i386/sys/Makefile.inc +++ b/lib/libc/i386/sys/Makefile.inc @@ -9,7 +9,7 @@ MDASM= Ovfork.S brk.S cerror.S exect.S pipe.S ptrace.S reboot.S sbrk.S \ # Don't generate default code for these syscalls: NOASM= break.o exit.o ftruncate.o getdomainname.o getlogin.o \ - lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \ + lseek.o mmap.o openbsd_poll.o pread.o \ pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o diff --git a/lib/libc/ia64/sys/Makefile.inc b/lib/libc/ia64/sys/Makefile.inc index 63067631ca25..dacd2f904562 100644 --- a/lib/libc/ia64/sys/Makefile.inc +++ b/lib/libc/ia64/sys/Makefile.inc @@ -5,7 +5,7 @@ MDASM+= Ovfork.S brk.S cerror.S exect.S fork.S getcontext.S pipe.S ptrace.S \ # Don't generate default code for these syscalls: NOASM= break.o exit.o ftruncate.o getdomainname.o getlogin.o \ - lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \ + lseek.o mmap.o openbsd_poll.o pread.o \ pwrite.o setdomainname.o sstk.o truncate.o uname.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o diff --git a/lib/libc/powerpc/sys/Makefile.inc b/lib/libc/powerpc/sys/Makefile.inc index 3718c1a53274..84579ae5e038 100644 --- a/lib/libc/powerpc/sys/Makefile.inc +++ b/lib/libc/powerpc/sys/Makefile.inc @@ -4,7 +4,7 @@ MDASM+= brk.S cerror.S exect.S pipe.S ptrace.S sbrk.S setlogin.S # Don't generate default code for these syscalls: NOASM= break.o exit.o ftruncate.o getdomainname.o getlogin.o \ - lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \ + lseek.o mmap.o openbsd_poll.o pread.o \ pwrite.o setdomainname.o sstk.o truncate.o uname.o yield.o PSEUDO= _getlogin.o _exit.o diff --git a/lib/libc/sparc64/sys/Makefile.inc b/lib/libc/sparc64/sys/Makefile.inc index 67d9acdbe6fc..f677df19d1ba 100644 --- a/lib/libc/sparc64/sys/Makefile.inc +++ b/lib/libc/sparc64/sys/Makefile.inc @@ -16,7 +16,7 @@ MDASM+= brk.S cerror.S exect.S pipe.S ptrace.S sbrk.S setlogin.S sigaction.S # Don't generate default code for these syscalls: NOASM= break.o exit.o ftruncate.o getdomainname.o getlogin.o \ - lseek.o mlockall.o mmap.o munlockall.o openbsd_poll.o pread.o \ + lseek.o mmap.o openbsd_poll.o pread.o \ pwrite.o setdomainname.o sstk.o truncate.o uname.o yield.o PSEUDO= _getlogin.o _exit.o diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index ae536d5bb6a8..d66bd7fffaad 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -69,8 +69,8 @@ MAN+= _exit.2 accept.2 access.2 acct.2 adjtime.2 \ kldfind.2 kldfirstmod.2 kldload.2 kldnext.2 kldstat.2 kldsym.2 \ kldunload.2 kqueue.2 kse.2 ktrace.2 link.2 lio_listio.2 listen.2 \ lseek.2 \ - madvise.2 mincore.2 minherit.2 mkdir.2 mkfifo.2 mknod.2 mlock.2 mmap.2 \ - modfind.2 modnext.2 modstat.2 mount.2 \ + madvise.2 mincore.2 minherit.2 mkdir.2 mkfifo.2 mknod.2 mlock.2 \ + mlockall.2 mmap.2 modfind.2 modnext.2 modstat.2 mount.2 \ mprotect.2 msync.2 munmap.2 nanosleep.2 ntp_adjtime.2 ntp_gettime.2 \ nfssvc.2 open.2 pathconf.2 pipe.2 poll.2 profil.2 ptrace.2 quotactl.2 \ read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \ @@ -121,6 +121,7 @@ MLINKS+=kse.2 kse_create.2 kse.2 kse_exit.2 kse.2 kse_release.2 \ kse.2 kse_wakeup.2 kse.2 kse_thr_interrupt.2 MLINKS+=madvise.2 posix_madvise.2 MLINKS+=mlock.2 munlock.2 +MLINKS+=mlockall.2 munlockall.2 MLINKS+=modnext.2 modfnext.2 MLINKS+=mount.2 unmount.2 MLINKS+=pathconf.2 fpathconf.2 diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index cf7117a2452d..f05cab162b1f 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -745,7 +745,7 @@ link_elf_load_file(linker_class_t cls, const char* filename, vm_map_wire(kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, - FALSE); + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); #endif } diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index cf7117a2452d..f05cab162b1f 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -745,7 +745,7 @@ link_elf_load_file(linker_class_t cls, const char* filename, vm_map_wire(kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, - FALSE); + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); #endif } diff --git a/sys/sys/mman.h b/sys/sys/mman.h index ec058d8b2619..09162ecd4b70 100644 --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -188,6 +188,8 @@ int munmap(void *, size_t); int posix_madvise(void *, size_t, int); #endif #if __POSIX_VISIBLE >= 199309 +int mlockall(int); +int munlockall(void); int shm_open(const char *, int, mode_t); int shm_unlink(const char *); #endif diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c index a4dca9beef39..9e6f57d7a080 100644 --- a/sys/vm/vm_contig.c +++ b/sys/vm/vm_contig.c @@ -266,7 +266,8 @@ again1: tmp_addr += PAGE_SIZE; } VM_OBJECT_UNLOCK(kernel_object); - vm_map_wire(map, addr, addr + size, FALSE); + vm_map_wire(map, addr, addr + size, + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); splx(s); return ((void *)addr); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index d0f6bf233cad..5d0744ba5d43 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -189,7 +189,8 @@ vslock(addr, len) { vm_map_wire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), - round_page((vm_offset_t)addr + len), FALSE); + round_page((vm_offset_t)addr + len), + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); } /* @@ -203,7 +204,8 @@ vsunlock(addr, len) vm_map_unwire(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr), - round_page((vm_offset_t)addr + len), FALSE); + round_page((vm_offset_t)addr + len), + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); } /* diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index ce4a450ee31f..f56075de7443 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -216,7 +216,8 @@ kmem_alloc(map, size) /* * And finally, mark the data as non-pageable. */ - (void) vm_map_wire(map, addr, addr + size, FALSE); + (void) vm_map_wire(map, addr, addr + size, + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES); return (addr); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index eddfc88b97c8..d4910cde6b03 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1604,19 +1604,24 @@ vm_map_inherit(vm_map_t map, vm_offset_t start, vm_offset_t end, */ int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t user_unwire) + int flags) { vm_map_entry_t entry, first_entry, tmp_entry; vm_offset_t saved_start; unsigned int last_timestamp; int rv; - boolean_t need_wakeup, result; + boolean_t need_wakeup, result, user_unwire; + user_unwire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { - vm_map_unlock(map); - return (KERN_INVALID_ADDRESS); + if (flags & VM_MAP_WIRE_HOLESOK) + first_entry = map->header.next; + else { + vm_map_unlock(map); + return (KERN_INVALID_ADDRESS); + } } last_timestamp = map->timestamp; entry = first_entry; @@ -1672,9 +1677,11 @@ vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, entry->eflags |= MAP_ENTRY_IN_TRANSITION; /* * Check the map for holes in the specified region. + * If VM_MAP_WIRE_HOLESOK was specified, skip this check. */ - if (entry->end < end && (entry->next == &map->header || - entry->next->start > entry->end)) { + if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && + (entry->end < end && (entry->next == &map->header || + entry->next->start > entry->end))) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; @@ -1733,19 +1740,24 @@ done: */ int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t user_wire) + int flags) { vm_map_entry_t entry, first_entry, tmp_entry; vm_offset_t saved_end, saved_start; unsigned int last_timestamp; int rv; - boolean_t need_wakeup, result; + boolean_t need_wakeup, result, user_wire; + user_wire = (flags & VM_MAP_WIRE_USER) ? TRUE : FALSE; vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); if (!vm_map_lookup_entry(map, start, &first_entry)) { - vm_map_unlock(map); - return (KERN_INVALID_ADDRESS); + if (flags & VM_MAP_WIRE_HOLESOK) + first_entry = map->header.next; + else { + vm_map_unlock(map); + return (KERN_INVALID_ADDRESS); + } } last_timestamp = map->timestamp; entry = first_entry; @@ -1856,9 +1868,11 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, } /* * Check the map for holes in the specified region. + * If VM_MAP_WIRE_HOLESOK was specified, skip this check. */ - if (entry->end < end && (entry->next == &map->header || - entry->next->start > entry->end)) { + if (((flags & VM_MAP_WIRE_HOLESOK) == 0) && + (entry->end < end && (entry->next == &map->header || + entry->next->start > entry->end))) { end = entry->end; rv = KERN_INVALID_ADDRESS; goto done; @@ -2394,6 +2408,10 @@ vmspace_fork(struct vmspace *vm1) new_map = &vm2->vm_map; /* XXX */ new_map->timestamp = 1; + /* Do not inherit the MAP_WIREFUTURE property. */ + if ((new_map->flags & MAP_WIREFUTURE) == MAP_WIREFUTURE) + new_map->flags &= ~MAP_WIREFUTURE; + old_entry = old_map->header.next; while (old_entry != &old_map->header) { @@ -2704,6 +2722,15 @@ Retry: } vm_map_unlock(map); + /* + * Heed the MAP_WIREFUTURE flag if it was set for this process. + */ + if (rv == KERN_SUCCESS && (map->flags & MAP_WIREFUTURE)) + vm_map_wire(map, addr, stack_entry->start, + (p->p_flag & P_SYSTEM ? + VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES : + VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES)); + return (rv); } diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 62d012410c45..21c27e68560e 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -81,6 +81,7 @@ * vm_map_entry_t an entry in an address map. */ +typedef u_char vm_flags_t; typedef u_int vm_eflags_t; /* @@ -171,6 +172,7 @@ struct vm_map { u_char needs_wakeup; u_char system_map; /* Am I a system map? */ u_char infork; /* Am I in fork processing? */ + vm_flags_t flags; /* flags for this vm_map */ vm_map_entry_t root; /* Root of a binary search tree */ unsigned int timestamp; /* Version number */ vm_map_entry_t first_free; /* First free space hint */ @@ -179,6 +181,11 @@ struct vm_map { #define max_offset header.end /* (c) */ }; +/* + * vm_flags_t values + */ +#define MAP_WIREFUTURE 0x01 /* wire all future pages */ + #ifdef _KERNEL static __inline vm_offset_t vm_map_max(vm_map_t map) @@ -197,6 +204,12 @@ vm_map_pmap(vm_map_t map) { return (map->pmap); } + +static __inline void +vm_map_modflags(vm_map_t map, vm_flags_t set, vm_flags_t clear) +{ + map->flags = (map->flags | set) & ~clear; +} #endif /* _KERNEL */ /* @@ -296,6 +309,15 @@ long vmspace_resident_count(struct vmspace *vmspace); #define VM_FAULT_WIRE_MASK (VM_FAULT_CHANGE_WIRING|VM_FAULT_USER_WIRE) #define VM_FAULT_DIRTY 8 /* Dirty the page */ +/* + * vm_map_wire and vm_map_unwire option flags + */ +#define VM_MAP_WIRE_SYSTEM 0 /* wiring in a kernel map */ +#define VM_MAP_WIRE_USER 1 /* wiring in a user map */ + +#define VM_MAP_WIRE_NOHOLES 0 /* region must not have holes */ +#define VM_MAP_WIRE_HOLESOK 2 /* region may have holes */ + #ifdef _KERNEL boolean_t vm_map_check_protection (vm_map_t, vm_offset_t, vm_offset_t, vm_prot_t); vm_map_t vm_map_create(pmap_t, vm_offset_t, vm_offset_t); @@ -322,9 +344,9 @@ void vm_init2 (void); int vm_map_stack (vm_map_t, vm_offset_t, vm_size_t, vm_prot_t, vm_prot_t, int); int vm_map_growstack (struct proc *p, vm_offset_t addr); int vm_map_unwire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t user_unwire); + int flags); int vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, - boolean_t user_wire); + int flags); int vmspace_swap_count (struct vmspace *vmspace); #endif /* _KERNEL */ #endif /* _VM_MAP_ */ diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 15a6c77a7238..b8188e4b0d50 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -1046,7 +1046,7 @@ mlock(td, uap) #endif error = vm_map_wire(&td->td_proc->p_vmspace->vm_map, addr, - addr + size, TRUE); + addr + size, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); return (error == KERN_SUCCESS ? 0 : ENOMEM); } @@ -1064,14 +1064,54 @@ mlockall(td, uap) struct thread *td; struct mlockall_args *uap; { - /* mtx_lock(&Giant); */ - /* mtx_unlock(&Giant); */ - return 0; + vm_map_t map; + int error; + + map = &td->td_proc->p_vmspace->vm_map; + error = 0; + + if ((uap->how == 0) || ((uap->how & ~(MCL_CURRENT|MCL_FUTURE)) != 0)) + return (EINVAL); + +#ifdef pmap_wired_count + /* + * If wiring all pages in the process would cause it to exceed + * a hard resource limit, return ENOMEM. + */ + if (map->size - ptoa(pmap_wired_count(vm_map_pmap(map)) > + td->td_proc->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)) + return (ENOMEM); +#else + error = suser(td); + if (error) + return (error); +#endif + + if (uap->how & MCL_FUTURE) { + vm_map_lock(map); + vm_map_modflags(map, MAP_WIREFUTURE, 0); + vm_map_unlock(map); + error = 0; + } + + if (uap->how & MCL_CURRENT) { + /* + * P1003.1-2001 mandates that all currently mapped pages + * will be memory resident and locked (wired) upon return + * from mlockall(). vm_map_wire() will wire pages, by + * calling vm_fault_wire() for each page in the region. + */ + error = vm_map_wire(map, vm_map_min(map), vm_map_max(map), + VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); + error = (error == KERN_SUCCESS ? 0 : EAGAIN); + } + + return (error); } #ifndef _SYS_SYSPROTO_H_ struct munlockall_args { - int how; + register_t dummy; }; #endif @@ -1083,9 +1123,26 @@ munlockall(td, uap) struct thread *td; struct munlockall_args *uap; { - /* mtx_lock(&Giant); */ - /* mtx_unlock(&Giant); */ - return 0; + vm_map_t map; + int error; + + map = &td->td_proc->p_vmspace->vm_map; +#ifndef pmap_wired_count + error = suser(td); + if (error) + return (error); +#endif + + /* Clear the MAP_WIREFUTURE flag from this vm_map. */ + vm_map_lock(map); + vm_map_modflags(map, 0, MAP_WIREFUTURE); + vm_map_unlock(map); + + /* Forcibly unwire all pages. */ + error = vm_map_unwire(map, vm_map_min(map), vm_map_max(map), + VM_MAP_WIRE_USER|VM_MAP_WIRE_HOLESOK); + + return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -1125,7 +1182,7 @@ munlock(td, uap) #endif error = vm_map_unwire(&td->td_proc->p_vmspace->vm_map, addr, - addr + size, TRUE); + addr + size, VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); return (error == KERN_SUCCESS ? 0 : ENOMEM); } @@ -1282,6 +1339,15 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, if (rv != KERN_SUCCESS) (void) vm_map_remove(map, *addr, *addr + size); } + + /* + * If the process has requested that all future mappings + * be wired, then heed this. + */ + if ((rv == KERN_SUCCESS) && (map->flags & MAP_WIREFUTURE)) + vm_map_wire(map, *addr, *addr + size, + VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); + switch (rv) { case KERN_SUCCESS: return (0); diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c index b57958e05dd8..877609f1eb77 100644 --- a/sys/vm/vm_unix.c +++ b/sys/vm/vm_unix.c @@ -79,7 +79,9 @@ obreak(td, uap) vm_offset_t new, old, base; int rv; int error = 0; + boolean_t do_map_wirefuture; + do_map_wirefuture = FALSE; new = round_page((vm_offset_t)uap->nsize); vm_map_lock(&vm->vm_map); @@ -121,6 +123,20 @@ obreak(td, uap) goto done; } vm->vm_dsize += btoc(new - old); + /* + * Handle the MAP_WIREFUTURE case for legacy applications, + * by marking the newly mapped range of pages as wired. + * We are not required to perform a corresponding + * vm_map_unwire() before vm_map_delete() below, as + * it will forcibly unwire the pages in the range. + * + * XXX If the pages cannot be wired, no error is returned. + */ + if ((vm->vm_map.flags & MAP_WIREFUTURE) == MAP_WIREFUTURE) { + if (bootverbose) + printf("obreak: MAP_WIREFUTURE set\n"); + do_map_wirefuture = TRUE; + } } else if (new < old) { rv = vm_map_delete(&vm->vm_map, new, old); if (rv != KERN_SUCCESS) { @@ -131,6 +147,11 @@ obreak(td, uap) } done: vm_map_unlock(&vm->vm_map); + + if (do_map_wirefuture) + (void) vm_map_wire(&vm->vm_map, old, new, + VM_MAP_WIRE_USER|VM_MAP_WIRE_NOHOLES); + return (error); }