From 7b445033ff88f694d81ec39d7972f97867c40cf5 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Sun, 10 May 2015 09:00:40 +0000 Subject: [PATCH] On exec, single-threading must be enforced before arguments space is allocated from exec_map. If many threads try to perform execve(2) in parallel, the exec map is exhausted and some threads sleep uninterruptible waiting for the map space. Then, the thread which won the race for the space allocation, cannot single-thread the process, causing deadlock. Reported and tested by: pho (previous version) Sponsored by: The FreeBSD Foundation MFC after: 2 weeks --- sys/amd64/linux32/linux32_machdep.c | 10 ++- sys/compat/freebsd32/freebsd32_misc.c | 10 +++ sys/compat/svr4/svr4_misc.c | 14 ++++ sys/i386/ibcs2/ibcs2_misc.c | 14 ++++ sys/i386/linux/linux_machdep.c | 14 +++- sys/kern/kern_exec.c | 105 ++++++++++++++------------ sys/sys/imgact.h | 3 + 7 files changed, 119 insertions(+), 51 deletions(-) diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index 2b955d0ff8d7..155e90b1f4ba 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -137,6 +137,7 @@ int linux_execve(struct thread *td, struct linux_execve_args *args) { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; @@ -147,12 +148,17 @@ linux_execve(struct thread *td, struct linux_execve_args *args) printf(ARGS(execve, "%s"), path); #endif + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, args->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); - if (error == 0) + if (error == 0) { /* Linux process can execute FreeBSD one, do not attempt * to create emuldata for such process using * linux_proc_init, this leads to a panic on KASSERT @@ -160,6 +166,8 @@ linux_execve(struct thread *td, struct linux_execve_args *args) */ if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) error = linux_proc_init(td, 0, 0); + } + post_execve(td, error, oldvmspace); return (error); } diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 67210ff59de4..7a33d619ba86 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -402,12 +402,17 @@ int freebsd32_execve(struct thread *td, struct freebsd32_execve_args *uap) { struct image_args eargs; + struct vmspace *oldvmspace; int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = freebsd32_exec_copyin_args(&eargs, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } @@ -415,14 +420,19 @@ int freebsd32_fexecve(struct thread *td, struct freebsd32_fexecve_args *uap) { struct image_args eargs; + struct vmspace *oldvmspace; int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = freebsd32_exec_copyin_args(&eargs, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { eargs.fd = uap->fd; error = kern_execve(td, &eargs, NULL); } + post_execve(td, error, oldvmspace); return (error); } diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index cef1b488e673..c0da170a5e3b 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -167,15 +167,22 @@ svr4_sys_execv(td, uap) struct svr4_sys_execv_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, NULL); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } @@ -185,16 +192,23 @@ svr4_sys_execve(td, uap) struct svr4_sys_execve_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } diff --git a/sys/i386/ibcs2/ibcs2_misc.c b/sys/i386/ibcs2/ibcs2_misc.c index d81cfeeb97f9..2c2cae416c50 100644 --- a/sys/i386/ibcs2/ibcs2_misc.c +++ b/sys/i386/ibcs2/ibcs2_misc.c @@ -200,15 +200,22 @@ ibcs2_execv(td, uap) struct ibcs2_execv_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, NULL); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } @@ -218,16 +225,23 @@ ibcs2_execve(td, uap) struct ibcs2_execve_args *uap; { struct image_args eargs; + struct vmspace *oldvmspace; char *path; int error; CHECKALTEXIST(td, uap->path, &path); + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(path, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, uap->argp, uap->envp); free(path, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); + post_execve(td, error, oldvmspace); return (error); } diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index d9c28d9483da..effc32ae9306 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -126,9 +126,10 @@ bsd_to_linux_sigaltstack(int bsa) int linux_execve(struct thread *td, struct linux_execve_args *args) { - int error; - char *newpath; struct image_args eargs; + struct vmspace *oldvmspace; + char *newpath; + int error; LCONVPATHEXIST(td, args->path, &newpath); @@ -137,12 +138,17 @@ linux_execve(struct thread *td, struct linux_execve_args *args) printf(ARGS(execve, "%s"), newpath); #endif + error = pre_execve(td, &oldvmspace); + if (error != 0) { + free(newpath, M_TEMP); + return (error); + } error = exec_copyin_args(&eargs, newpath, UIO_SYSSPACE, args->argp, args->envp); free(newpath, M_TEMP); if (error == 0) error = kern_execve(td, &eargs, NULL); - if (error == 0) + if (error == 0) { /* linux process can exec fbsd one, dont attempt * to create emuldata for such process using * linux_proc_init, this leads to a panic on KASSERT @@ -150,6 +156,8 @@ linux_execve(struct thread *td, struct linux_execve_args *args) */ if (SV_PROC_ABI(td->td_proc) == SV_ABI_LINUX) error = linux_proc_init(td, 0, 0); + } + post_execve(td, error, oldvmspace); return (error); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 9d893f8e445f..8668f0d0d030 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -193,21 +193,20 @@ struct execve_args { #endif int -sys_execve(td, uap) - struct thread *td; - struct execve_args /* { - char *fname; - char **argv; - char **envv; - } */ *uap; +sys_execve(struct thread *td, struct execve_args *uap) { - int error; struct image_args args; + struct vmspace *oldvmspace; + int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, NULL); + post_execve(td, error, oldvmspace); return (error); } @@ -221,15 +220,20 @@ struct fexecve_args { int sys_fexecve(struct thread *td, struct fexecve_args *uap) { - int error; struct image_args args; + struct vmspace *oldvmspace; + int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = exec_copyin_args(&args, NULL, UIO_SYSSPACE, uap->argv, uap->envv); if (error == 0) { args.fd = uap->fd; error = kern_execve(td, &args, NULL); } + post_execve(td, error, oldvmspace); return (error); } @@ -243,65 +247,56 @@ struct __mac_execve_args { #endif int -sys___mac_execve(td, uap) - struct thread *td; - struct __mac_execve_args /* { - char *fname; - char **argv; - char **envv; - struct mac *mac_p; - } */ *uap; +sys___mac_execve(struct thread *td, struct __mac_execve_args *uap) { #ifdef MAC - int error; struct image_args args; + struct vmspace *oldvmspace; + int error; + error = pre_execve(td, &oldvmspace); + if (error != 0) + return (error); error = exec_copyin_args(&args, uap->fname, UIO_USERSPACE, uap->argv, uap->envv); if (error == 0) error = kern_execve(td, &args, uap->mac_p); + post_execve(td, error, oldvmspace); return (error); #else return (ENOSYS); #endif } -/* - * XXX: kern_execve has the astonishing property of not always returning to - * the caller. If sufficiently bad things happen during the call to - * do_execve(), it can end up calling exit1(); as a result, callers must - * avoid doing anything which they might need to undo (e.g., allocating - * memory). - */ int -kern_execve(td, args, mac_p) - struct thread *td; - struct image_args *args; - struct mac *mac_p; +pre_execve(struct thread *td, struct vmspace **oldvmspace) { - struct proc *p = td->td_proc; - struct vmspace *oldvmspace; + struct proc *p; int error; - AUDIT_ARG_ARGV(args->begin_argv, args->argc, - args->begin_envv - args->begin_argv); - AUDIT_ARG_ENVV(args->begin_envv, args->envc, - args->endp - args->begin_envv); - if (p->p_flag & P_HADTHREADS) { + KASSERT(td == curthread, ("non-current thread %p", td)); + error = 0; + p = td->td_proc; + if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); - if (thread_single(p, SINGLE_BOUNDARY)) { - PROC_UNLOCK(p); - exec_free_args(args); - return (ERESTART); /* Try again later. */ - } + if (thread_single(p, SINGLE_BOUNDARY) != 0) + error = ERESTART; PROC_UNLOCK(p); } + KASSERT(error != 0 || (td->td_pflags & TDP_EXECVMSPC) == 0, + ("nested execve")); + *oldvmspace = p->p_vmspace; + return (error); +} - KASSERT((td->td_pflags & TDP_EXECVMSPC) == 0, ("nested execve")); - oldvmspace = td->td_proc->p_vmspace; - error = do_execve(td, args, mac_p); +void +post_execve(struct thread *td, int error, struct vmspace *oldvmspace) +{ + struct proc *p; - if (p->p_flag & P_HADTHREADS) { + KASSERT(td == curthread, ("non-current thread %p", td)); + p = td->td_proc; + if ((p->p_flag & P_HADTHREADS) != 0) { PROC_LOCK(p); /* * If success, we upgrade to SINGLE_EXIT state to @@ -314,13 +309,29 @@ kern_execve(td, args, mac_p) PROC_UNLOCK(p); } if ((td->td_pflags & TDP_EXECVMSPC) != 0) { - KASSERT(td->td_proc->p_vmspace != oldvmspace, + KASSERT(p->p_vmspace != oldvmspace, ("oldvmspace still used")); vmspace_free(oldvmspace); td->td_pflags &= ~TDP_EXECVMSPC; } +} - return (error); +/* + * XXX: kern_execve has the astonishing property of not always returning to + * the caller. If sufficiently bad things happen during the call to + * do_execve(), it can end up calling exit1(); as a result, callers must + * avoid doing anything which they might need to undo (e.g., allocating + * memory). + */ +int +kern_execve(struct thread *td, struct image_args *args, struct mac *mac_p) +{ + + AUDIT_ARG_ARGV(args->begin_argv, args->argc, + args->begin_envv - args->begin_argv); + AUDIT_ARG_ENVV(args->begin_envv, args->envc, + args->endp - args->begin_envv); + return (do_execve(td, args, mac_p)); } /* diff --git a/sys/sys/imgact.h b/sys/sys/imgact.h index ac88a149cb81..4a5ce9c4ac07 100644 --- a/sys/sys/imgact.h +++ b/sys/sys/imgact.h @@ -86,6 +86,7 @@ struct image_params { #ifdef _KERNEL struct sysentvec; struct thread; +struct vmspace; #define IMGACT_CORE_COMPRESS 0x01 @@ -98,6 +99,8 @@ void exec_setregs(struct thread *, struct image_params *, u_long); int exec_shell_imgact(struct image_params *); int exec_copyin_args(struct image_args *, char *, enum uio_seg, char **, char **); +int pre_execve(struct thread *td, struct vmspace **oldvmspace); +void post_execve(struct thread *td, int error, struct vmspace *oldvmspace); #endif #endif /* !_SYS_IMGACT_H_ */