diff --git a/lib/libc/sys/mmap.2 b/lib/libc/sys/mmap.2 index 73ffb2ead67b..130f70bcaa88 100644 --- a/lib/libc/sys/mmap.2 +++ b/lib/libc/sys/mmap.2 @@ -28,7 +28,7 @@ .\" @(#)mmap.2 8.4 (Berkeley) 5/11/95 .\" $FreeBSD$ .\" -.Dd March 18, 2012 +.Dd August 16, 2013 .Dt MMAP 2 .Os .Sh NAME @@ -97,7 +97,30 @@ Sharing, mapping type and options are specified in the argument by .Em or Ns 'ing the following values: -.Bl -tag -width MAP_HASSEMAPHORE +.Bl -tag -width MAP_PREFAULT_READ +.It Dv MAP_ALIGNED Ns Pq Fa n +Align the region on a requested boundary. +If a suitable region cannot be found, +.Fn mmap +will fail. +The +.Fa n +argument specifies the binary logarithm of the desired alignment. +.It Dv MAP_ALIGNED_SUPER +Align the region to maximize the potential use of large +.Pq Dq super +pages. +If a suitable region cannot be found, +.Fn mmap +will fail. +The system will choose a suitable page size based on the size of +mapping. +The page size used as well as the alignment of the region may both be +affected by properties of the file being mapped. +In particular, +the physical address of existing pages of a file may require a specific +alignment. +The region is not guaranteed to be aligned on any specific boundary. .It Dv MAP_ANON Map anonymous memory not associated with any specific file. The file descriptor used for creating @@ -274,6 +297,25 @@ Although this implementation does not impose any alignment restrictions on the .Fa offset argument, a portable program must only use page-aligned values. +.Pp +Large page mappings require that the pages backing an object be +aligned in matching blocks in both the virtual address space and RAM. +The system will automatically attempt to use large page mappings when +mapping an object that is already backed by large pages in RAM by +aligning the mapping request in the virtual address space to match the +alignment of the large physical pages. +The system may also use large page mappings when mapping portions of an +object that are not yet backed by pages in RAM. +The +.Dv MAP_ALIGNED_SUPER +flag is an optimization that will align the mapping request to the +size of a large page similar to +.Dv MAP_ALIGNED , +except that the system will override this alignment if an object already +uses large pages so that the mapping will be consistent with the existing +large pages. +This flag is mostly useful for maximizing the use of large pages on the +first mapping of objects that do not yet have pages present in RAM. .Sh RETURN VALUES Upon successful completion, .Fn mmap @@ -325,6 +367,10 @@ The argument was equal to zero. .It Bq Er EINVAL +.Dv MAP_ALIGNED +was specified and the desired alignment was either larger than the +virtual address size of the machine or smaller than a page. +.It Bq Er EINVAL .Dv MAP_ANON was specified and the .Fa fd @@ -356,7 +402,8 @@ was specified and insufficient memory was available. .Xr msync 2 , .Xr munlock 2 , .Xr munmap 2 , -.Xr getpagesize 3 +.Xr getpagesize 3 , +.Xr getpagesizes 3 .Sh BUGS The .Fa len diff --git a/sys/sys/mman.h b/sys/sys/mman.h index a178d7cb3d88..c5e47a0c1622 100644 --- a/sys/sys/mman.h +++ b/sys/sys/mman.h @@ -91,6 +91,17 @@ */ #define MAP_NOCORE 0x00020000 /* dont include these pages in a coredump */ #define MAP_PREFAULT_READ 0x00040000 /* prefault mapping for reading */ + +/* + * Request specific alignment (n == log2 of the desired alignment). + * + * MAP_ALIGNED_SUPER requests optimal superpage alignment, but does + * not enforce a specific alignment. + */ +#define MAP_ALIGNED(n) ((n) << MAP_ALIGNMENT_SHIFT) +#define MAP_ALIGNMENT_SHIFT 24 +#define MAP_ALIGNMENT_MASK MAP_ALIGNED(0xff) +#define MAP_ALIGNED_SUPER MAP_ALIGNED(1) /* align on a superpage */ #endif /* __BSD_VISIBLE */ #if __POSIX_VISIBLE >= 199309 diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c index b539f9d0c462..7ab1ee041647 100644 --- a/sys/vm/vm_init.c +++ b/sys/vm/vm_init.c @@ -112,7 +112,7 @@ kva_import(void *unused, vmem_size_t size, int flags, vmem_addr_t *addrp) addr = vm_map_min(kernel_map); result = vm_map_find(kernel_map, NULL, 0, &addr, size, - VMFS_ALIGNED_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); + VMFS_SUPER_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); if (result != KERN_SUCCESS) return (ENOMEM); diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index c7cb4095d5ab..9790653169bf 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -286,7 +286,7 @@ kmem_suballoc(vm_map_t parent, vm_offset_t *min, vm_offset_t *max, *min = vm_map_min(parent); ret = vm_map_find(parent, NULL, 0, min, size, superpage_align ? - VMFS_ALIGNED_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, + VMFS_SUPER_SPACE : VMFS_ANY_SPACE, VM_PROT_ALL, VM_PROT_ALL, MAP_ACC_NO_CHARGE); if (ret != KERN_SUCCESS) panic("kmem_suballoc: bad status return of %d", ret); diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 1d92965a10cd..1a6146ec1366 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -1434,12 +1434,17 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, vm_size_t length, int find_space, vm_prot_t prot, vm_prot_t max, int cow) { - vm_offset_t start, initial_addr; + vm_offset_t alignment, initial_addr, start; int result; if (find_space == VMFS_OPTIMAL_SPACE && (object == NULL || (object->flags & OBJ_COLORED) == 0)) - find_space = VMFS_ANY_SPACE; + find_space = VMFS_ANY_SPACE; + if (find_space >> 8 != 0) { + KASSERT((find_space & 0xff) == 0, ("bad VMFS flags")); + alignment = (vm_offset_t)1 << (find_space >> 8); + } else + alignment = 0; initial_addr = *addr; again: start = initial_addr; @@ -1455,12 +1460,18 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, return (KERN_NO_SPACE); } switch (find_space) { - case VMFS_ALIGNED_SPACE: + case VMFS_SUPER_SPACE: case VMFS_OPTIMAL_SPACE: pmap_align_superpage(object, offset, addr, length); break; + case VMFS_ANY_SPACE: + break; default: + if ((*addr & (alignment - 1)) != 0) { + *addr &= ~(alignment - 1); + *addr += alignment; + } break; } @@ -1468,8 +1479,8 @@ vm_map_find(vm_map_t map, vm_object_t object, vm_ooffset_t offset, } result = vm_map_insert(map, object, offset, start, start + length, prot, max, cow); - } while (result == KERN_NO_SPACE && (find_space == VMFS_ALIGNED_SPACE || - find_space == VMFS_OPTIMAL_SPACE)); + } while (result == KERN_NO_SPACE && find_space != VMFS_NO_SPACE && + find_space != VMFS_ANY_SPACE); vm_map_unlock(map); return (result); } diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index ed8864e7c6e6..054c50624845 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -339,12 +339,16 @@ long vmspace_resident_count(struct vmspace *vmspace); #define VM_FAULT_READ_AHEAD_MAX min(atop(MAXPHYS) - 1, UINT8_MAX) /* - * The following "find_space" options are supported by vm_map_find() + * The following "find_space" options are supported by vm_map_find(). + * + * For VMFS_ALIGNED_SPACE, the desired alignment is specified to + * the macro argument as log base 2 of the desired alignment. */ #define VMFS_NO_SPACE 0 /* don't find; use the given range */ #define VMFS_ANY_SPACE 1 /* find a range with any alignment */ #define VMFS_OPTIMAL_SPACE 2 /* find a range with optimal alignment*/ -#define VMFS_ALIGNED_SPACE 3 /* find a superpage-aligned range */ +#define VMFS_SUPER_SPACE 3 /* find a superpage-aligned range */ +#define VMFS_ALIGNED_SPACE(x) ((x) << 8) /* find a range with fixed alignment */ /* * vm_map_wire and vm_map_unwire option flags diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 1b0809756c20..53a7be51d7f7 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -201,7 +201,7 @@ sys_mmap(td, uap) vm_prot_t cap_maxprot, prot, maxprot; void *handle; objtype_t handle_type; - int flags, error; + int align, error, flags; off_t pos; struct vmspace *vms = td->td_proc->p_vmspace; cap_rights_t rights; @@ -251,6 +251,13 @@ sys_mmap(td, uap) size += pageoff; /* low end... */ size = (vm_size_t) round_page(size); /* hi end */ + /* Ensure alignment is at least a page and fits in a pointer. */ + align = flags & MAP_ALIGNMENT_MASK; + if (align != 0 && align != MAP_ALIGNED_SUPER && + (align >> MAP_ALIGNMENT_SHIFT >= sizeof(void *) * NBBY || + align >> MAP_ALIGNMENT_SHIFT < PAGE_SHIFT)) + return (EINVAL); + /* * Check for illegal addresses. Watch out for address wrap... Note * that VM_*_ADDRESS are not constants due to casts (argh). @@ -1490,7 +1497,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, boolean_t fitit; vm_object_t object = NULL; struct thread *td = curthread; - int docow, error, rv; + int docow, error, findspace, rv; boolean_t writecounted; if (size == 0) @@ -1605,12 +1612,17 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot, if (flags & MAP_STACK) rv = vm_map_stack(map, *addr, size, prot, maxprot, docow | MAP_STACK_GROWS_DOWN); - else if (fitit) - rv = vm_map_find(map, object, foff, addr, size, - object != NULL && object->type == OBJT_DEVICE ? - VMFS_ALIGNED_SPACE : VMFS_OPTIMAL_SPACE, prot, maxprot, - docow); - else + else if (fitit) { + if ((flags & MAP_ALIGNMENT_MASK) == MAP_ALIGNED_SUPER) + findspace = VMFS_SUPER_SPACE; + else if ((flags & MAP_ALIGNMENT_MASK) != 0) + findspace = VMFS_ALIGNED_SPACE(flags >> + MAP_ALIGNMENT_SHIFT); + else + findspace = VMFS_OPTIMAL_SPACE; + rv = vm_map_find(map, object, foff, addr, size, findspace, + prot, maxprot, docow); + } else rv = vm_map_fixed(map, object, foff, *addr, size, prot, maxprot, docow); diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr index bb70d131a2cb..5c82f302a6f4 100644 --- a/usr.bin/kdump/mksubr +++ b/usr.bin/kdump/mksubr @@ -385,7 +385,6 @@ auto_switch_type "lio_listioname" "LIO_(NO)?WAIT[[:space:]]+[0-9]+" auto_switch_type "madvisebehavname" "_?MADV_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h" auto_switch_type "minheritname" "INHERIT_[A-Z]+[[:space:]]+[0-9]+" "sys/mman.h" auto_or_type "mlockallname" "MCL_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mman.h" -auto_or_type "mmapflagsname" "MAP_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h" auto_or_type "mmapprotname" "PROT_[A-Z]+[[:space:]]+0x[0-9A-Fa-f]+" "sys/mman.h" auto_or_type "modename" "S_[A-Z]+[[:space:]]+[0-6]{7}" "sys/stat.h" auto_or_type "mountflagsname" "MNT_[A-Z]+[[:space:]]+0x[0-9]+" "sys/mount.h" @@ -466,6 +465,40 @@ cat <<_EOF_ } } +/* + * AUTO - Special + * + * The MAP_ALIGNED flag requires special handling. + */ +void +mmapflagsname(int flags) +{ + int align; + int or = 0; + printf("%#x<", flags); +_EOF_ +egrep "^#[[:space:]]*define[[:space:]]+MAP_[A-Z_]+[[:space:]]+0x[0-9A-Fa-f]+[[:space:]]*" \ + $include_dir/sys/mman.h | grep -v MAP_ALIGNED | \ + awk '{ for (i = 1; i <= NF; i++) \ + if ($i ~ /define/) \ + break; \ + ++i; \ + printf "\tif (!((flags > 0) ^ ((%s) > 0)))\n\t\tif_print_or(flags, %s, or);\n", $i, $i }' +cat <<_EOF_ + align = flags & MAP_ALIGNMENT_MASK; + if (align != 0) { + if (align == MAP_ALIGNED_SUPER) + print_or("MAP_ALIGNED_SUPER", or); + else { + print_or("MAP_ALIGNED", or); + printf("(%d)", align >> MAP_ALIGNMENT_SHIFT); + } + } + printf(">"); + if (or == 0) + printf("%d", flags); +} + /* * AUTO - Special * diff --git a/usr.bin/truss/syscalls.c b/usr.bin/truss/syscalls.c index 6668d7ce0836..af4ed5e960bc 100644 --- a/usr.bin/truss/syscalls.c +++ b/usr.bin/truss/syscalls.c @@ -296,7 +296,7 @@ static struct xlat mmap_flags[] = { X(MAP_SHARED) X(MAP_PRIVATE) X(MAP_FIXED) X(MAP_RENAME) X(MAP_NORESERVE) X(MAP_RESERVED0080) X(MAP_RESERVED0100) X(MAP_HASSEMAPHORE) X(MAP_STACK) X(MAP_NOSYNC) X(MAP_ANON) - X(MAP_NOCORE) XEND + X(MAP_NOCORE) X(MAP_PREFAULT_READ) XEND }; static struct xlat mprot_flags[] = { @@ -893,9 +893,41 @@ print_arg(struct syscall_args *sc, unsigned long *args, long retval, case Mprot: tmp = strdup(xlookup_bits(mprot_flags, args[sc->offset])); break; - case Mmapflags: - tmp = strdup(xlookup_bits(mmap_flags, args[sc->offset])); + case Mmapflags: { + const char *base, *alignstr; + int align, flags; + + /* + * MAP_ALIGNED can't be handled by xlookup_bits(), so + * generate that string manually and prepend it to the + * string from xlookup_bits(). Have to be careful to + * avoid outputting MAP_ALIGNED|0 if MAP_ALIGNED is + * the only flag. + */ + flags = args[sc->offset] & ~MAP_ALIGNMENT_MASK; + align = args[sc->offset] & MAP_ALIGNMENT_MASK; + if (align != 0) { + if (align == MAP_ALIGNED_SUPER) + alignstr = strdup("MAP_ALIGNED_SUPER"); + else + asprintf(&alignstr, "MAP_ALIGNED(%d)", + align >> MAP_ALIGNMENT_SHIFT); + if (flags == 0) { + tmp = alignstr; + break; + } + } else + alignstr = NULL; + base = strdup(xlookup_bits(mmap_flags, flags)); + if (alignstr == NULL) { + tmp = base; + break; + } + asprintf(&tmp, "%s|%s", alignstr, base); + free(alignstr); + free(base); break; + } case Whence: tmp = strdup(xlookup(whence_arg, args[sc->offset])); break;