From 17913b0b6b707568d63559255820f3212cd31cdf Mon Sep 17 00:00:00 2001 From: Dmitry Chagin Date: Thu, 12 Aug 2021 11:49:36 +0300 Subject: [PATCH] linux(4): Implement clone3 system call. clone3 system call is used by glibc-2.34. Differential revision: https://reviews.freebsd.org/D31475 MFC after: 2 weeks --- sys/amd64/linux/syscalls.master | 5 +- sys/amd64/linux32/syscalls.master | 5 +- sys/arm64/linux/syscalls.master | 5 +- sys/compat/linux/linux_fork.c | 80 +++++++++++++++++++++++++++++++ sys/compat/linux/linux_fork.h | 9 ++++ sys/compat/linux/linux_misc.h | 2 + sys/i386/linux/syscalls.master | 5 +- 7 files changed, 107 insertions(+), 4 deletions(-) diff --git a/sys/amd64/linux/syscalls.master b/sys/amd64/linux/syscalls.master index cdf663ce2e06..d3ebedbfed01 100644 --- a/sys/amd64/linux/syscalls.master +++ b/sys/amd64/linux/syscalls.master @@ -2082,7 +2082,10 @@ int linux_pidfd_open(void); } 435 AUE_NULL STD { - int linux_clone3(void); + int linux_clone3( + struct l_user_clone_args *uargs, + l_size_t usize + ); } 436 AUE_NULL STD { int linux_close_range(void); diff --git a/sys/amd64/linux32/syscalls.master b/sys/amd64/linux32/syscalls.master index ff7ab7f98ca8..9d55fb1ade48 100644 --- a/sys/amd64/linux32/syscalls.master +++ b/sys/amd64/linux32/syscalls.master @@ -2484,7 +2484,10 @@ int linux_pidfd_open(void); } 435 AUE_NULL STD { - int linux_clone3(void); + int linux_clone3( + struct l_user_clone_args *uargs, + l_size_t usize + ); } 436 AUE_NULL STD { int linux_close_range(void); diff --git a/sys/arm64/linux/syscalls.master b/sys/arm64/linux/syscalls.master index 6e163cc3360d..a6bb14a5ed63 100644 --- a/sys/arm64/linux/syscalls.master +++ b/sys/arm64/linux/syscalls.master @@ -1731,7 +1731,10 @@ int linux_pidfd_open(void); } 435 AUE_NULL STD { - int linux_clone3(void); + int linux_clone3( + struct l_user_clone_args *uargs, + l_size_t usize + ); } 436 AUE_NULL STD { int linux_close_range(void); diff --git a/sys/compat/linux/linux_fork.c b/sys/compat/linux/linux_fork.c index 97f5b7d89de4..db3e9e1ea27b 100644 --- a/sys/compat/linux/linux_fork.c +++ b/sys/compat/linux/linux_fork.c @@ -377,6 +377,86 @@ linux_clone(struct thread *td, struct linux_clone_args *args) return (linux_clone_proc(td, &ca)); } + +static int +linux_clone3_args_valid(struct l_user_clone_args *uca) +{ + + /* Verify that no unknown flags are passed along. */ + if ((uca->flags & ~(LINUX_CLONE_LEGACY_FLAGS | + LINUX_CLONE_CLEAR_SIGHAND | LINUX_CLONE_INTO_CGROUP)) != 0) + return (EINVAL); + if ((uca->flags & (LINUX_CLONE_DETACHED | LINUX_CSIGNAL)) != 0) + return (EINVAL); + + if ((uca->flags & (LINUX_CLONE_SIGHAND | LINUX_CLONE_CLEAR_SIGHAND)) == + (LINUX_CLONE_SIGHAND | LINUX_CLONE_CLEAR_SIGHAND)) + return (EINVAL); + if ((uca->flags & (LINUX_CLONE_THREAD | LINUX_CLONE_PARENT)) != 0 && + uca->exit_signal != 0) + return (EINVAL); + + /* We don't support set_tid, only validate input. */ + if (uca->set_tid_size > LINUX_MAX_PID_NS_LEVEL) + return (EINVAL); + if (uca->set_tid == 0 && uca->set_tid_size > 0) + return (EINVAL); + if (uca->set_tid != 0 && uca->set_tid_size == 0) + return (EINVAL); + + if (uca->stack == 0 && uca->stack_size > 0) + return (EINVAL); + if (uca->stack != 0 && uca->stack_size == 0) + return (EINVAL); + + return (0); +} + +int +linux_clone3(struct thread *td, struct linux_clone3_args *args) +{ + struct l_user_clone_args *uca; + struct l_clone_args *ca; + size_t size; + int error; + + if (args->usize > PAGE_SIZE) + return (E2BIG); + if (args->usize < LINUX_CLONE_ARGS_SIZE_VER0) + return (EINVAL); + + /* + * usize can be less than size of struct clone_args, to avoid using + * of uninitialized data of struct clone_args, allocate at least + * sizeof(struct clone_args) storage and zero it. + */ + size = max(args->usize, sizeof(*uca)); + uca = malloc(size, M_LINUX, M_WAITOK | M_ZERO); + error = copyin(args->uargs, uca, args->usize); + if (error != 0) + goto out; + error = linux_clone3_args_valid(uca); + if (error != 0) + goto out; + ca = malloc(sizeof(*ca), M_LINUX, M_WAITOK | M_ZERO); + ca->flags = uca->flags; + ca->child_tid = PTRIN(uca->child_tid); + ca->parent_tid = PTRIN(uca->parent_tid); + ca->exit_signal = uca->exit_signal; + ca->stack = uca->stack + uca->stack_size; + ca->stack_size = uca->stack_size; + ca->tls = uca->tls; + + if ((ca->flags & LINUX_CLONE_THREAD) != 0) + error = linux_clone_thread(td, ca); + else + error = linux_clone_proc(td, ca); + free(ca, M_LINUX); +out: + free(uca, M_LINUX); + return (error); +} + int linux_exit(struct thread *td, struct linux_exit_args *args) { diff --git a/sys/compat/linux/linux_fork.h b/sys/compat/linux/linux_fork.h index 04dfb8ac8a70..fa7b39544450 100644 --- a/sys/compat/linux/linux_fork.h +++ b/sys/compat/linux/linux_fork.h @@ -53,6 +53,13 @@ #define LINUX_CLONE_NEWNET 0x40000000 #define LINUX_CLONE_IO 0x80000000 +/* Flags for the clone3() syscall. */ +#define LINUX_CLONE_CLEAR_SIGHAND 0x100000000ULL +#define LINUX_CLONE_INTO_CGROUP 0x200000000ULL +#define LINUX_CLONE_NEWTIME 0x00000080 + +#define LINUX_CLONE_LEGACY_FLAGS 0xffffffffULL + #define LINUX_CSIGNAL 0x000000ff /* @@ -85,6 +92,8 @@ struct l_clone_args { l_ulong tls; }; +#define LINUX_CLONE_ARGS_SIZE_VER0 64 + int linux_set_upcall(struct thread *, register_t); int linux_set_cloned_tls(struct thread *, void *); void linux_thread_detach(struct thread *); diff --git a/sys/compat/linux/linux_misc.h b/sys/compat/linux/linux_misc.h index 80f6b8a58e81..ceb140d3da75 100644 --- a/sys/compat/linux/linux_misc.h +++ b/sys/compat/linux/linux_misc.h @@ -33,6 +33,8 @@ #include +#define LINUX_MAX_PID_NS_LEVEL 32 + /* bits per mask */ #define LINUX_NFDBITS sizeof(l_fd_mask) * 8 diff --git a/sys/i386/linux/syscalls.master b/sys/i386/linux/syscalls.master index aecb852e21c7..27bbca9e65e7 100644 --- a/sys/i386/linux/syscalls.master +++ b/sys/i386/linux/syscalls.master @@ -2502,7 +2502,10 @@ int linux_pidfd_open(void); } 435 AUE_NULL STD { - int linux_clone3(void); + int linux_clone3( + struct l_user_clone_args *uargs, + l_size_t usize + ); } 436 AUE_NULL STD { int linux_close_range(void);