From 52c81be11a107cdedb865a274b5567b0c95c0308 Mon Sep 17 00:00:00 2001 From: Edward Tomasz Napierala Date: Sat, 20 Jun 2020 18:29:22 +0000 Subject: [PATCH] Add linux_madvise(2) instead of having Linux apps call the native FreeBSD madvise(2) directly. While some of the flag values match, most don't. PR: kern/230160 Reported by: markj Reviewed by: markj Discussed with: brooks, kib MFC after: 2 weeks Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D25272 --- sys/amd64/linux/linux_machdep.c | 7 ++++ sys/amd64/linux/syscalls.master | 2 +- sys/amd64/linux32/linux32_machdep.c | 7 ++++ sys/amd64/linux32/syscalls.master | 2 +- sys/arm64/linux/linux_machdep.c | 7 ++++ sys/arm64/linux/syscalls.master | 5 +-- sys/compat/linux/linux_mmap.c | 56 +++++++++++++++++++++++++++++ sys/compat/linux/linux_mmap.h | 21 +++++++++++ sys/i386/linux/linux_machdep.c | 7 ++++ sys/i386/linux/syscalls.master | 2 +- sys/sys/syscallsubr.h | 2 ++ sys/vm/vm_mmap.c | 14 ++++++-- 12 files changed, 124 insertions(+), 8 deletions(-) diff --git a/sys/amd64/linux/linux_machdep.c b/sys/amd64/linux/linux_machdep.c index 7b3512bfe41a..654fefa02009 100644 --- a/sys/amd64/linux/linux_machdep.c +++ b/sys/amd64/linux/linux_machdep.c @@ -140,6 +140,13 @@ linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } +int +linux_madvise(struct thread *td, struct linux_madvise_args *uap) +{ + + return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); +} + int linux_iopl(struct thread *td, struct linux_iopl_args *args) { diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master index c3936a926da3..bfab9e7ba831 100644 --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -94,7 +94,7 @@ l_size_t len, l_int fl); } 27 AUE_MINCORE STD { int linux_mincore(l_ulong start, \ l_size_t len, u_char *vec); } -28 AUE_MADVISE NOPROTO { int madvise(void *addr, size_t len, \ +28 AUE_MADVISE STD { int linux_madvise(void *addr, size_t len, \ int behav); } 29 AUE_NULL STD { int linux_shmget(l_key_t key, l_size_t size, \ l_int shmflg); } diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index b62a300191cb..8213a2f606b5 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -468,6 +468,13 @@ linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } +int +linux_madvise(struct thread *td, struct linux_madvise_args *uap) +{ + + return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); +} + int linux_iopl(struct thread *td, struct linux_iopl_args *args) { diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index 96933bb970b3..e40a9973a539 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -390,7 +390,7 @@ char *put_old); } 218 AUE_MINCORE STD { int linux_mincore(l_ulong start, \ l_size_t len, u_char *vec); } -219 AUE_MADVISE NOPROTO { int madvise(void *addr, size_t len, \ +219 AUE_MADVISE STD { int linux_madvise(void *addr, size_t len, \ int behav); } 220 AUE_GETDIRENTRIES STD { int linux_getdents64(l_uint fd, \ void *dirent, l_uint count); } diff --git a/sys/arm64/linux/linux_machdep.c b/sys/arm64/linux/linux_machdep.c index ef6faf275452..058cbe965b16 100644 --- a/sys/arm64/linux/linux_machdep.c +++ b/sys/arm64/linux/linux_machdep.c @@ -104,6 +104,13 @@ linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) uap->prot)); } +int +linux_madvise(struct thread *td, struct linux_madvise_args *uap) +{ + + return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); +} + /* LINUXTODO: implement arm64 linux_rt_sigsuspend */ int linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master index 46e710321620..021c2c2a669c 100644 --- a/sys/arm64/linux/syscalls.master +++ b/sys/arm64/linux/syscalls.master @@ -1310,8 +1310,9 @@ u_char *vec ); } -233 AUE_MADVISE NOPROTO { - int madvise(void *addr, +233 AUE_MADVISE STD { + int linux_madvise( + void *addr, size_t len, int behav ); diff --git a/sys/compat/linux/linux_mmap.c b/sys/compat/linux/linux_mmap.c index 941a3388487c..31976084d0e5 100644 --- a/sys/compat/linux/linux_mmap.c +++ b/sys/compat/linux/linux_mmap.c @@ -242,6 +242,62 @@ linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot) return (kern_mprotect(td, addr, len, prot)); } +int +linux_madvise_common(struct thread *td, uintptr_t addr, size_t len, int behav) +{ + + switch (behav) { + case LINUX_MADV_NORMAL: + return (kern_madvise(td, addr, len, MADV_NORMAL)); + case LINUX_MADV_RANDOM: + return (kern_madvise(td, addr, len, MADV_RANDOM)); + case LINUX_MADV_SEQUENTIAL: + return (kern_madvise(td, addr, len, MADV_SEQUENTIAL)); + case LINUX_MADV_WILLNEED: + return (kern_madvise(td, addr, len, MADV_WILLNEED)); + case LINUX_MADV_DONTNEED: + return (kern_madvise(td, addr, len, MADV_DONTNEED)); + case LINUX_MADV_FREE: + return (kern_madvise(td, addr, len, MADV_FREE)); + case LINUX_MADV_REMOVE: + linux_msg(curthread, "unsupported madvise MADV_REMOVE"); + return (EINVAL); + case LINUX_MADV_DONTFORK: + return (kern_minherit(td, addr, len, INHERIT_NONE)); + case LINUX_MADV_DOFORK: + return (kern_minherit(td, addr, len, INHERIT_COPY)); + case LINUX_MADV_MERGEABLE: + linux_msg(curthread, "unsupported madvise MADV_MERGEABLE"); + return (EINVAL); + case LINUX_MADV_UNMERGEABLE: + /* We don't merge anyway. */ + return (0); + case LINUX_MADV_HUGEPAGE: + /* Ignored; on FreeBSD huge pages are always on. */ + return (0); + case LINUX_MADV_NOHUGEPAGE: + linux_msg(curthread, "unsupported madvise MADV_NOHUGEPAGE"); + return (EINVAL); + case LINUX_MADV_DONTDUMP: + return (kern_madvise(td, addr, len, MADV_NOCORE)); + case LINUX_MADV_DODUMP: + return (kern_madvise(td, addr, len, MADV_CORE)); + case LINUX_MADV_WIPEONFORK: + return (kern_minherit(td, addr, len, INHERIT_ZERO)); + case LINUX_MADV_KEEPONFORK: + return (kern_minherit(td, addr, len, INHERIT_COPY)); + case LINUX_MADV_HWPOISON: + linux_msg(curthread, "unsupported madvise MADV_HWPOISON"); + return (EINVAL); + case LINUX_MADV_SOFT_OFFLINE: + linux_msg(curthread, "unsupported madvise MADV_SOFT_OFFLINE"); + return (EINVAL); + default: + linux_msg(curthread, "unsupported madvise behav %d", behav); + return (EINVAL); + } +} + #if defined(__amd64__) static void linux_fixup_prot(struct thread *td, int *prot) diff --git a/sys/compat/linux/linux_mmap.h b/sys/compat/linux/linux_mmap.h index 8dc123526f21..3bedc2102f5f 100644 --- a/sys/compat/linux/linux_mmap.h +++ b/sys/compat/linux/linux_mmap.h @@ -45,8 +45,29 @@ #define LINUX_PROT_GROWSDOWN 0x01000000 #define LINUX_PROT_GROWSUP 0x02000000 +#define LINUX_MADV_NORMAL 0 +#define LINUX_MADV_RANDOM 1 +#define LINUX_MADV_SEQUENTIAL 2 +#define LINUX_MADV_WILLNEED 3 +#define LINUX_MADV_DONTNEED 4 +#define LINUX_MADV_FREE 8 +#define LINUX_MADV_REMOVE 9 +#define LINUX_MADV_DONTFORK 10 +#define LINUX_MADV_DOFORK 11 +#define LINUX_MADV_MERGEABLE 12 +#define LINUX_MADV_UNMERGEABLE 13 +#define LINUX_MADV_HUGEPAGE 14 +#define LINUX_MADV_NOHUGEPAGE 15 +#define LINUX_MADV_DONTDUMP 16 +#define LINUX_MADV_DODUMP 17 +#define LINUX_MADV_WIPEONFORK 18 +#define LINUX_MADV_KEEPONFORK 19 +#define LINUX_MADV_HWPOISON 100 +#define LINUX_MADV_SOFT_OFFLINE 101 + int linux_mmap_common(struct thread *, uintptr_t, size_t, int, int, int, off_t); int linux_mprotect_common(struct thread *, uintptr_t, size_t, int); +int linux_madvise_common(struct thread *, uintptr_t, size_t, int); #endif /* _LINUX_MMAP_H_ */ diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index 2b83e72d20e5..ba106d554ee4 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -353,6 +353,13 @@ linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, uap->prot)); } +int +linux_madvise(struct thread *td, struct linux_madvise_args *uap) +{ + + return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, uap->behav)); +} + int linux_ioperm(struct thread *td, struct linux_ioperm_args *args) { diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index 6e86c1b88d74..0ced058a8e1c 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -393,7 +393,7 @@ char *put_old); } 218 AUE_MINCORE STD { int linux_mincore(l_ulong start, \ l_size_t len, u_char *vec); } -219 AUE_MADVISE NOPROTO { int madvise(void *addr, size_t len, \ +219 AUE_MADVISE STD { int linux_madvise(void *addr, size_t len, \ int behav); } 220 AUE_GETDIRENTRIES STD { int linux_getdents64(l_uint fd, \ void *dirent, l_uint count); } diff --git a/sys/sys/syscallsubr.h b/sys/sys/syscallsubr.h index 1593ef2c4941..058ca0929ac0 100644 --- a/sys/sys/syscallsubr.h +++ b/sys/sys/syscallsubr.h @@ -185,6 +185,8 @@ int kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg); int kern_madvise(struct thread *td, uintptr_t addr, size_t len, int behav); int kern_mincore(struct thread *td, uintptr_t addr, size_t len, char *vec); +int kern_minherit(struct thread *td, uintptr_t addr, size_t len, + int inherit); int kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, int mode); int kern_mkfifoat(struct thread *td, int fd, const char *path, diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 78e6db9976f8..7add69a58f06 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -701,14 +701,22 @@ struct minherit_args { #endif int sys_minherit(struct thread *td, struct minherit_args *uap) +{ + + return (kern_minherit(td, (uintptr_t)uap->addr, uap->len, + uap->inherit)); +} + +int +kern_minherit(struct thread *td, uintptr_t addr0, size_t len, int inherit0) { vm_offset_t addr; vm_size_t size, pageoff; vm_inherit_t inherit; - addr = (vm_offset_t)uap->addr; - size = uap->len; - inherit = uap->inherit; + addr = (vm_offset_t)addr0; + size = len; + inherit = inherit0; pageoff = (addr & PAGE_MASK); addr -= pageoff;