Add the linux 2.6.x stuff (not used by default!):

- TLS - complete
 - pid/tid mangling - complete
 - thread area - complete
 - futexes - complete with issues
 - clone() extension - complete with some possible minor issues
 - mq*/timer*/clock* stuff - complete but untested and the mq* stuff is
   disabled when not build as part of the kernel with native FreeBSD mq*
   support (module support for this will come later)

Tested with:
 - linux-firefox - works, tested
 - linux-opera - works, tested
 - linux-realplay - doesnt work, issue with futexes
 - linux-skype - doesnt work, issue with futexes
 - linux-rt2-demo - works, tested
 - linux-acroread - doesnt work, unknown reason (coredump) and sometimes
   issue with futexes
 - various unix utilities in linux-base-gentoo3 and linux-base-fc4:
   everything tried worked

On amd64 not everything is supported like on i386, the catchup is planned for
later when the remaining bugs in the new functions are fixed.

To test this new stuff, you have to run
	sysctl compat.linux.osrelease=2.6.16
to switch back use
	sysctl compat.linux.osrelease=2.4.2

Don't switch while running a linux program, strange things may or may not
happen.

Sponsored by:			Google SoC 2006
Submitted by:			rdivacky
Some suggestions/help by:	jhb, kib, manu@NetBSD.org, netchild
This commit is contained in:
Alexander Leidinger 2006-08-15 12:54:30 +00:00
parent c107650561
commit 9b44bfc556
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=161310
13 changed files with 771 additions and 69 deletions

View File

@ -34,6 +34,10 @@
#define _AMD64_LINUX_LINUX_H_
#include <sys/signal.h> /* for sigval union */
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <amd64/linux32/linux32_syscall.h>
@ -495,6 +499,7 @@ struct l_rt_sigframe {
extern int bsd_to_linux_signal[];
extern int linux_to_bsd_signal[];
extern struct sysentvec elf_linux_sysvec;
/*
* Pluggable ioctl handlers
@ -527,6 +532,11 @@ int linux_ioctl_unregister_handler(struct linux_ioctl_handler *h);
#define LINUX_O_NDELAY LINUX_O_NONBLOCK
#define LINUX_O_SYNC 010000
#define LINUX_FASYNC 020000
#define LINUX_O_DIRECT 040000 /* direct disk access hint */
#define LINUX_O_LARGEFILE 0100000
#define LINUX_O_DIRECTORY 0200000 /* must be a directory */
#define LINUX_O_NOFOLLOW 0400000 /* don't follow links */
#define LINUX_O_NOATIME 01000000
#define LINUX_F_DUPFD 0
#define LINUX_F_GETFD 1
@ -737,4 +747,99 @@ struct l_pollfd {
l_short revents;
} __packed;
struct l_user_desc {
l_uint entry_number;
l_uint base_addr;
l_uint limit;
l_uint seg_32bit:1;
l_uint contents:2;
l_uint read_exec_only:1;
l_uint limit_in_pages:1;
l_uint seg_not_present:1;
l_uint useable:1;
};
struct l_desc_struct {
unsigned long a,b;
};
#define LINUX_LOWERWORD 0x0000ffff
/* macros which does the same thing as those in linux include/asm-um/ldt-i386.h
* these convert linux user-space descriptor to machine one
*/
#define LDT_entry_a(info) \
((((info)->base_addr & LINUX_LOWERWORD) << 16) | ((info)->limit & LINUX_LOWERWORD))
#define ENTRY_B_READ_EXEC_ONLY 9
#define ENTRY_B_CONTENTS 10
#define ENTRY_B_SEG_NOT_PRESENT 15
#define ENTRY_B_BASE_ADDR 16
#define ENTRY_B_USEABLE 20
#define ENTRY_B_SEG32BIT 22
#define ENTRY_B_LIMIT 23
#define LDT_entry_b(info) \
(((info)->base_addr & 0xff000000) | \
((info)->limit & 0xf0000) | \
((info)->contents << ENTRY_B_CONTENTS) | \
(((info)->seg_not_present == 0) << ENTRY_B_SEG_NOT_PRESENT) | \
(((info)->base_addr & 0x00ff0000) >> ENTRY_B_BASE_ADDR) | \
(((info)->read_exec_only == 0) << ENTRY_B_READ_EXEC_ONLY) | \
((info)->seg_32bit << ENTRY_B_SEG32BIT) | \
((info)->useable << ENTRY_B_USEABLE) | \
((info)->limit_in_pages << ENTRY_B_LIMIT) | 0x7000)
#define LDT_empty(info) (\
(info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
(info)->seg_not_present == 1 && \
(info)->read_exec_only == 1 && \
(info)->seg_32bit == 0 && \
(info)->limit_in_pages == 0 && \
(info)->useable == 0 )
/* macros for converting segments, they do the same as those in arch/i386/kernel/process.c */
#define GET_BASE(desc) ( \
(((desc)->a >> 16) & LINUX_LOWERWORD) | \
(((desc)->b << 16) & 0x00ff0000) | \
( (desc)->b & 0xff000000) )
#define GET_LIMIT(desc) ( \
((desc)->a & LINUX_LOWERWORD) | \
((desc)->b & 0xf0000) )
#define GET_32BIT(desc) (((desc)->b >> ENTRY_B_SEG32BIT) & 1)
#define GET_CONTENTS(desc) (((desc)->b >> ENTRY_B_CONTENTS) & 3)
#define GET_WRITABLE(desc) (((desc)->b >> ENTRY_B_READ_EXEC_ONLY) & 1)
#define GET_LIMIT_PAGES(desc) (((desc)->b >> ENTRY_B_LIMIT) & 1)
#define GET_PRESENT(desc) (((desc)->b >> ENTRY_B_SEG_NOT_PRESENT) & 1)
#define GET_USEABLE(desc) (((desc)->b >> ENTRY_B_USEABLE) & 1)
#define LINUX_CLOCK_REALTIME 0
#define LINUX_CLOCK_MONOTONIC 1
#define LINUX_CLOCK_PROCESS_CPUTIME_ID 2
#define LINUX_CLOCK_THREAD_CPUTIME_ID 3
#define LINUX_CLOCK_REALTIME_HR 4
#define LINUX_CLOCK_MONOTONIC_HR 5
typedef int l_timer_t;
typedef int l_mqd_t;
#define CLONE_VM 0x100
#define CLONE_FS 0x200
#define CLONE_FILES 0x400
#define CLONE_SIGHAND 0x800
#define CLONE_PID 0x1000 /* this flag does not exist in linux anymore */
#define CLONE_PARENT 0x00008000
#define CLONE_THREAD 0x10000
#define CLONE_SETTLS 0x80000
#define CLONE_CHILD_CLEARTID 0x00200000
#define CLONE_CHILD_SETTID 0x01000000
#define CLONE_PARENT_SETTID 0x00100000
#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
#endif /* !_AMD64_LINUX_LINUX_H_ */

View File

@ -72,19 +72,13 @@ DUMMY(epoll_create);
DUMMY(epoll_ctl);
DUMMY(epoll_wait);
DUMMY(remap_file_pages);
DUMMY(set_tid_address);
DUMMY(timer_create);
DUMMY(timer_settime);
DUMMY(timer_gettime);
DUMMY(timer_getoverrun);
DUMMY(timer_delete);
DUMMY(clock_settime);
DUMMY(clock_gettime);
DUMMY(clock_getres);
DUMMY(clock_nanosleep);
DUMMY(statfs64);
DUMMY(fstatfs64);
DUMMY(tgkill);
DUMMY(utimes);
DUMMY(fadvise64_64);
DUMMY(mbind);

View File

@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/imgact.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mman.h>
@ -472,12 +473,6 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
return (0);
}
#define CLONE_VM 0x100
#define CLONE_FS 0x200
#define CLONE_FILES 0x400
#define CLONE_SIGHAND 0x800
#define CLONE_PID 0x1000
int
linux_clone(struct thread *td, struct linux_clone_args *args)
{
@ -491,14 +486,9 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
printf(ARGS(clone, "flags %x, stack %x"),
(unsigned int)(uintptr_t)args->flags,
(unsigned int)(uintptr_t)args->stack);
if (args->flags & CLONE_PID)
printf(LMSG("CLONE_PID not yet supported"));
}
#endif
if (!args->stack)
return (EINVAL);
exit_signal = args->flags & 0x000000ff;
if (exit_signal >= LINUX_NSIG)
return (EINVAL);
@ -522,7 +512,11 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
p2->p_sigparent = exit_signal;
PROC_UNLOCK(p2);
td2 = FIRST_THREAD_IN_PROC(p2);
td2->td_frame->tf_rsp = PTROUT(args->stack);
/* in a case of stack = NULL we are supposed to COW calling process stack
* this is what normal fork() does so we just keep the tf_rsp arg intact
*/
if (args->stack)
td2->td_frame->tf_rsp = PTROUT(args->stack);
#ifdef DEBUG
if (ldebug(clone))

View File

@ -993,7 +993,7 @@ linux32_fixlimits(struct proc *p)
struct sysentvec elf_linux_sysvec = {
LINUX_SYS_MAXSYSCALL,
linux_sysent,
0xff,
0,
LINUX_SIGTBLSZ,
bsd_to_linux_signal,
ELAST + 1,

View File

@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
#include <posix4/sched.h>
#include <compat/linux/linux_sysproto.h>
#include <compat/linux/linux_emul.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
@ -93,6 +94,9 @@ __FBSDID("$FreeBSD$");
#define BSD_TO_LINUX_SIGNAL(sig) \
(((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig)
extern struct sx emul_shared_lock;
extern struct sx emul_lock;
static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = {
RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK,
RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE,
@ -1330,11 +1334,69 @@ linux_reboot(struct thread *td, struct linux_reboot_args *args)
int
linux_getpid(struct thread *td, struct linux_getpid_args *args)
{
struct linux_emuldata *em;
em = em_find(td->td_proc, EMUL_UNLOCKED);
KASSERT(em != NULL, ("getpid: emuldata not found.\n"));
td->td_retval[0] = em->shared->group_pid;
EMUL_UNLOCK(&emul_lock);
return (0);
}
int
linux_gettid(struct thread *td, struct linux_gettid_args *args)
{
#ifdef DEBUG
if (ldebug(gettid))
printf(ARGS(gettid, ""));
#endif
td->td_retval[0] = td->td_proc->p_pid;
return (0);
}
int
linux_getppid(struct thread *td, struct linux_getppid_args *args)
{
struct linux_emuldata *em;
struct proc *p, *pp;
em = em_find(td->td_proc, EMUL_UNLOCKED);
KASSERT(em != NULL, ("getppid: process emuldata not found.\n"));
/* find the group leader */
p = pfind(em->shared->group_pid);
if (p == NULL) {
#ifdef DEBUG
printf(LMSG("parent process not found.\n"));
#endif
return (0);
}
pp = p->p_pptr; /* switch to parent */
PROC_LOCK(pp);
PROC_UNLOCK(p);
/* if its also linux process */
if (pp->p_sysent == &elf_linux_sysvec) {
em = em_find(pp, EMUL_LOCKED);
KASSERT(em != NULL, ("getppid: parent emuldata not found.\n"));
td->td_retval[0] = em->shared->group_pid;
} else
td->td_retval[0] = pp->p_pid;
EMUL_UNLOCK(&emul_lock);
PROC_UNLOCK(pp);
return (0);
}
int
linux_getgid(struct thread *td, struct linux_getgid_args *args)
{
@ -1394,3 +1456,39 @@ linux_sethostname(struct thread *td, struct linux_sethostname_args *args)
args->len, 0, 0));
}
int
linux_exit_group(struct thread *td, struct linux_exit_group_args *args)
{
struct linux_emuldata *em, *td_em, *tmp_em;
struct proc *sp;
#ifdef DEBUG
if (ldebug(exit_group))
printf(ARGS(exit_group, "%i"), args->error_code);
#endif
td_em = em_find(td->td_proc, EMUL_UNLOCKED);
KASSERT(td_em != NULL, ("exit_group: emuldata not found.\n"));
EMUL_SHARED_RLOCK(&emul_shared_lock);
LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) {
if (em->pid == td_em->pid)
continue;
sp = pfind(em->pid);
psignal(sp, SIGKILL);
PROC_UNLOCK(sp);
#ifdef DEBUG
printf(LMSG("linux_sys_exit_group: kill PID %d\n"), em->pid);
#endif
}
EMUL_SHARED_RUNLOCK(&emul_shared_lock);
EMUL_UNLOCK(&emul_lock);
exit1(td, W_EXITCODE(args->error_code,0));
return (0);
}

View File

@ -49,6 +49,10 @@ __FBSDID("$FreeBSD$");
#endif
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
extern struct sx emul_shared_lock;
extern struct sx emul_lock;
void
linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
@ -447,3 +451,57 @@ linux_kill(struct thread *td, struct linux_kill_args *args)
tmp.pid = args->pid;
return (kill(td, &tmp));
}
int
linux_tgkill(struct thread *td, struct linux_tgkill_args *args)
{
struct linux_emuldata *em;
struct linux_kill_args ka;
struct proc *p;
#ifdef DEBUG
if (ldebug(tgkill))
printf(ARGS(tgkill, "%d, %d, %d"), args->tgid, args->pid, args->sig);
#endif
ka.pid = args->pid;
ka.signum = args->sig;
if (args->tgid == -1)
return linux_kill(td, &ka);
if ((p = pfind(args->pid)) == NULL)
return ESRCH;
if (p->p_sysent != &elf_linux_sysvec)
return ESRCH;
PROC_UNLOCK(p);
em = em_find(p, EMUL_UNLOCKED);
if (em == NULL) {
#ifdef DEBUG
printf("emuldata not found in tgkill.\n");
#endif
return ESRCH;
}
if (em->shared->group_pid != args->tgid)
return ESRCH;
EMUL_UNLOCK(&emul_lock);
return linux_kill(td, &ka);
}
int
linux_tkill(struct thread *td, struct linux_tkill_args *args)
{
#ifdef DEBUG
if (ldebug(tkill))
printf(ARGS(tkill, "%i, %i"), args->tid, args->sig);
#endif
return (linux_kill(td, (struct linux_kill_args *) args));
}

View File

@ -32,6 +32,8 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_compat.h"
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/lock.h>
@ -47,6 +49,11 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#include <compat/linux/linux_util.h>
#ifdef COMPAT_LINUX32
#include <machine/../linux32/linux.h>
#else
#include <machine/../linux/linux.h>
#endif
const char linux_emul_path[] = "/compat/linux";
@ -85,8 +92,6 @@ linux_msg(const struct thread *td, const char *fmt, ...)
printf("\n");
}
MALLOC_DECLARE(M_LINUX);
struct device_element
{
TAILQ_ENTRY(device_element) list;

View File

@ -233,7 +233,9 @@ amd64/linux32/linux32_locore.s optional compat_linux32 \
amd64/linux32/linux32_machdep.c optional compat_linux32
amd64/linux32/linux32_sysent.c optional compat_linux32
amd64/linux32/linux32_sysvec.c optional compat_linux32
compat/linux/linux_emul.c optional compat_linux32
compat/linux/linux_file.c optional compat_linux32
compat/linux/linux_futex.c optional compat_linux32
compat/linux/linux_getcwd.c optional compat_linux32
compat/linux/linux_ioctl.c optional compat_linux32
compat/linux/linux_ipc.c optional compat_linux32
@ -243,6 +245,7 @@ compat/linux/linux_signal.c optional compat_linux32
compat/linux/linux_socket.c optional compat_linux32
compat/linux/linux_stats.c optional compat_linux32
compat/linux/linux_sysctl.c optional compat_linux32
compat/linux/linux_time.c optional compat_linux32
compat/linux/linux_uid16.c optional compat_linux32
compat/linux/linux_util.c optional compat_linux32
dev/amr/amr_linux.c optional compat_linux32 amr

View File

@ -85,7 +85,9 @@ rr232x_lib.o optional rr232x \
#
compat/linprocfs/linprocfs.c optional linprocfs
compat/linsysfs/linsysfs.c optional linsysfs
compat/linux/linux_emul.c optional compat_linux
compat/linux/linux_file.c optional compat_linux
compat/linux/linux_futex.c optional compat_linux
compat/linux/linux_getcwd.c optional compat_linux
compat/linux/linux_ioctl.c optional compat_linux
compat/linux/linux_ipc.c optional compat_linux
@ -95,6 +97,7 @@ compat/linux/linux_signal.c optional compat_linux
compat/linux/linux_socket.c optional compat_linux
compat/linux/linux_stats.c optional compat_linux
compat/linux/linux_sysctl.c optional compat_linux
compat/linux/linux_time.c optional compat_linux
compat/linux/linux_uid16.c optional compat_linux
compat/linux/linux_util.c optional compat_linux
compat/ndis/kern_ndis.c optional ndisapi pci

View File

@ -32,6 +32,10 @@
#define _I386_LINUX_LINUX_H_
#include <sys/signal.h> /* for sigval union */
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <i386/linux/linux_syscall.h>
@ -706,4 +710,99 @@ struct l_pollfd {
l_short revents;
};
struct l_user_desc {
l_uint entry_number;
l_uint base_addr;
l_uint limit;
l_uint seg_32bit:1;
l_uint contents:2;
l_uint read_exec_only:1;
l_uint limit_in_pages:1;
l_uint seg_not_present:1;
l_uint useable:1;
};
struct l_desc_struct {
unsigned long a,b;
};
#define LINUX_LOWERWORD 0x0000ffff
/* macros which does the same thing as those in linux include/asm-um/ldt-i386.h
* these convert linux user-space descriptor to machine one
*/
#define LDT_entry_a(info) \
((((info)->base_addr & LINUX_LOWERWORD) << 16) | ((info)->limit & LINUX_LOWERWORD))
#define ENTRY_B_READ_EXEC_ONLY 9
#define ENTRY_B_CONTENTS 10
#define ENTRY_B_SEG_NOT_PRESENT 15
#define ENTRY_B_BASE_ADDR 16
#define ENTRY_B_USEABLE 20
#define ENTRY_B_SEG32BIT 22
#define ENTRY_B_LIMIT 23
#define LDT_entry_b(info) \
(((info)->base_addr & 0xff000000) | \
((info)->limit & 0xf0000) | \
((info)->contents << ENTRY_B_CONTENTS) | \
(((info)->seg_not_present == 0) << ENTRY_B_SEG_NOT_PRESENT) | \
(((info)->base_addr & 0x00ff0000) >> ENTRY_B_BASE_ADDR) | \
(((info)->read_exec_only == 0) << ENTRY_B_READ_EXEC_ONLY) | \
((info)->seg_32bit << ENTRY_B_SEG32BIT) | \
((info)->useable << ENTRY_B_USEABLE) | \
((info)->limit_in_pages << ENTRY_B_LIMIT) | 0x7000)
#define LDT_empty(info) (\
(info)->base_addr == 0 && \
(info)->limit == 0 && \
(info)->contents == 0 && \
(info)->seg_not_present == 1 && \
(info)->read_exec_only == 1 && \
(info)->seg_32bit == 0 && \
(info)->limit_in_pages == 0 && \
(info)->useable == 0 )
/* macros for converting segments, they do the same as those in arch/i386/kernel/process.c */
#define GET_BASE(desc) ( \
(((desc)->a >> 16) & LINUX_LOWERWORD) | \
(((desc)->b << 16) & 0x00ff0000) | \
( (desc)->b & 0xff000000) )
#define GET_LIMIT(desc) ( \
((desc)->a & LINUX_LOWERWORD) | \
((desc)->b & 0xf0000) )
#define GET_32BIT(desc) (((desc)->b >> ENTRY_B_SEG32BIT) & 1)
#define GET_CONTENTS(desc) (((desc)->b >> ENTRY_B_CONTENTS) & 3)
#define GET_WRITABLE(desc) (((desc)->b >> ENTRY_B_READ_EXEC_ONLY) & 1)
#define GET_LIMIT_PAGES(desc) (((desc)->b >> ENTRY_B_LIMIT) & 1)
#define GET_PRESENT(desc) (((desc)->b >> ENTRY_B_SEG_NOT_PRESENT) & 1)
#define GET_USEABLE(desc) (((desc)->b >> ENTRY_B_USEABLE) & 1)
#define LINUX_CLOCK_REALTIME 0
#define LINUX_CLOCK_MONOTONIC 1
#define LINUX_CLOCK_PROCESS_CPUTIME_ID 2
#define LINUX_CLOCK_THREAD_CPUTIME_ID 3
#define LINUX_CLOCK_REALTIME_HR 4
#define LINUX_CLOCK_MONOTONIC_HR 5
typedef int l_timer_t;
typedef int l_mqd_t;
#define CLONE_VM 0x100
#define CLONE_FS 0x200
#define CLONE_FILES 0x400
#define CLONE_SIGHAND 0x800
#define CLONE_PID 0x1000 /* this flag does not exist in linux anymore */
#define CLONE_PARENT 0x00008000
#define CLONE_THREAD 0x10000
#define CLONE_SETTLS 0x80000
#define CLONE_CHILD_CLEARTID 0x00200000
#define CLONE_CHILD_SETTID 0x01000000
#define CLONE_PARENT_SETTID 0x00100000
#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
#endif /* !_I386_LINUX_LINUX_H_ */

View File

@ -61,7 +61,7 @@ DUMMY(rt_sigtimedwait);
DUMMY(rt_sigqueueinfo);
DUMMY(capget);
DUMMY(capset);
DUMMY(sendfile);
DUMMY(sendfile); /* different semantics */
DUMMY(truncate64);
DUMMY(setfsuid);
DUMMY(setfsgid);
@ -73,30 +73,13 @@ DUMMY(epoll_create);
DUMMY(epoll_ctl);
DUMMY(epoll_wait);
DUMMY(remap_file_pages);
DUMMY(set_tid_address);
DUMMY(timer_create);
DUMMY(timer_settime);
DUMMY(timer_gettime);
DUMMY(timer_getoverrun);
DUMMY(timer_delete);
DUMMY(clock_settime);
DUMMY(clock_gettime);
DUMMY(clock_getres);
DUMMY(clock_nanosleep);
DUMMY(statfs64);
DUMMY(fstatfs64);
DUMMY(tgkill);
DUMMY(utimes);
DUMMY(fadvise64_64);
DUMMY(mbind);
DUMMY(get_mempolicy);
DUMMY(set_mempolicy);
DUMMY(mq_open);
DUMMY(mq_unlink);
DUMMY(mq_timedsend);
DUMMY(mq_timedreceive);
DUMMY(mq_notify);
DUMMY(mq_getsetattr);
DUMMY(kexec_load);
DUMMY(waitid);
DUMMY(add_key);

View File

@ -36,13 +36,16 @@ __FBSDID("$FreeBSD$");
#include <sys/malloc.h>
#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/sx.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/signalvar.h>
#include <sys/syscallsubr.h>
#include <sys/sysproto.h>
#include <sys/unistd.h>
#include <sys/wait.h>
#include <machine/frame.h>
#include <machine/psl.h>
@ -58,6 +61,16 @@ __FBSDID("$FreeBSD$");
#include <compat/linux/linux_ipc.h>
#include <compat/linux/linux_signal.h>
#include <compat/linux/linux_util.h>
#include <compat/linux/linux_emul.h>
#include <i386/include/pcb.h> /* needed for pcb definition in linux_set_thread_area */
#include "opt_posix.h"
extern struct sx emul_shared_lock;
extern struct sx emul_lock;
extern struct sysentvec elf32_freebsd_sysvec; /* defined in i386/i386/elf_machdep.c */
struct l_descriptor {
l_uint entry_number;
@ -122,6 +135,14 @@ linux_execve(struct thread *td, struct linux_execve_args *args)
free(newpath, M_TEMP);
if (error == 0)
error = kern_execve(td, &eargs, NULL);
if (error == 0)
/* linux process can exec fbsd one, dont attempt
* to create emuldata for such process using
* linux_proc_init, this leads to a panic on KASSERT
* because such process has p->p_emuldata == NULL
*/
if (td->td_proc->p_sysent == &elf_linux_sysvec)
error = linux_proc_init(td, 0, 0);
return (error);
}
@ -287,6 +308,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args)
if (td->td_retval[1] == 1)
td->td_retval[0] = 0;
error = linux_proc_init(td, td->td_retval[0], 0);
if (error)
return (error);
return (0);
}
@ -305,18 +330,12 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
/* Are we the child? */
if (td->td_retval[1] == 1)
td->td_retval[0] = 0;
error = linux_proc_init(td, td->td_retval[0], 0);
if (error)
return (error);
return (0);
}
#define CLONE_VM 0x100
#define CLONE_FS 0x200
#define CLONE_FILES 0x400
#define CLONE_SIGHAND 0x800
#define CLONE_PID 0x1000
#define CLONE_THREAD 0x10000
#define THREADING_FLAGS (CLONE_VM | CLONE_FS | CLONE_FILES | CLONE_SIGHAND)
int
linux_clone(struct thread *td, struct linux_clone_args *args)
{
@ -324,19 +343,16 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
struct proc *p2;
struct thread *td2;
int exit_signal;
struct linux_emuldata *em;
#ifdef DEBUG
if (ldebug(clone)) {
printf(ARGS(clone, "flags %x, stack %x"),
(unsigned int)args->flags, (unsigned int)args->stack);
if (args->flags & CLONE_PID)
printf(LMSG("CLONE_PID not yet supported"));
printf(ARGS(clone, "flags %x, stack %x, parent tid: %x, child tid: %x"),
(unsigned int)args->flags, (unsigned int)args->stack,
(unsigned int)args->parent_tidptr, (unsigned int)args->child_tidptr);
}
#endif
if (!args->stack)
return (EINVAL);
exit_signal = args->flags & 0x000000ff;
if (exit_signal >= LINUX_NSIG)
return (EINVAL);
@ -371,12 +387,118 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
if (error)
return (error);
/* create the emuldata */
error = linux_proc_init(td, p2->p_pid, args->flags);
/* reference it - no need to check this */
em = em_find(p2, EMUL_UNLOCKED);
KASSERT(em != NULL, ("clone: emuldata not found.\n"));
/* and adjust it */
if (args->flags & CLONE_PARENT_SETTID) {
if (args->parent_tidptr == NULL) {
EMUL_UNLOCK(&emul_lock);
return (EINVAL);
}
error = copyout(&p2->p_pid, args->parent_tidptr, sizeof(p2->p_pid));
if (error) {
EMUL_UNLOCK(&emul_lock);
return (error);
}
}
if (args->flags & CLONE_PARENT) {
#ifdef DEBUG
printf("linux_clone: CLONE_PARENT\n");
#endif
}
if (args->flags & CLONE_THREAD) {
/* XXX: linux mangles pgrp and pptr somehow
* I think it might be this but I am not sure.
*/
#ifdef notyet
p2->p_pgrp = td->td_proc->p_pgrp;
p2->p_pptr = td->td_proc->p_pptr;
#endif
exit_signal = 0;
#ifdef DEBUG
printf("linux_clone: CLONE_THREADS\n");
#endif
}
if (args->flags & CLONE_CHILD_SETTID)
em->child_set_tid = args->child_tidptr;
else
em->child_set_tid = NULL;
if (args->flags & CLONE_CHILD_CLEARTID)
em->child_clear_tid = args->child_tidptr;
else
em->child_clear_tid = NULL;
EMUL_UNLOCK(&emul_lock);
PROC_LOCK(p2);
p2->p_sigparent = exit_signal;
PROC_UNLOCK(p2);
td2 = FIRST_THREAD_IN_PROC(p2);
td2->td_frame->tf_esp = (unsigned int)args->stack;
/* in a case of stack = NULL we are supposed to COW calling process stack
* this is what normal fork() does so we just keep the tf_esp arg intact
*/
if (args->stack)
td2->td_frame->tf_esp = (unsigned int)args->stack;
if (args->flags & CLONE_SETTLS) {
struct l_user_desc info;
int idx;
int a[2];
struct segment_descriptor sd;
error = copyin((void *)td->td_frame->tf_esi, &info, sizeof(struct l_user_desc));
if (error)
return (error);
idx = info.entry_number;
/* looks like we're getting the idx we returned
* in the set_thread_area() syscall
*/
if (idx != 6 && idx != 3)
return (EINVAL);
/* this doesnt happen in practice */
if (idx == 6) {
/* we might copy out the entry_number as 3 */
info.entry_number = 3;
error = copyout(&info, (void *) td->td_frame->tf_esi, sizeof(struct l_user_desc));
if (error)
return (error);
}
a[0] = LDT_entry_a(&info);
a[1] = LDT_entry_b(&info);
memcpy(&sd, &a, sizeof(a));
#ifdef DEBUG
if (ldebug(clone))
printf("Segment created in clone with CLONE_SETTLS: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
sd.sd_hibase,
sd.sd_lolimit,
sd.sd_hilimit,
sd.sd_type,
sd.sd_dpl,
sd.sd_p,
sd.sd_xx,
sd.sd_def32,
sd.sd_gran);
#endif
/* this is taken from i386 version of cpu_set_user_tls() */
critical_enter();
/* set %gs */
td2->td_pcb->pcb_gsd = sd;
PCPU_GET(fsgs_gdt)[1] = sd;
load_gs(GSEL(GUGS_SEL, SEL_UPL));
critical_exit();
}
#ifdef DEBUG
if (ldebug(clone))
@ -847,25 +969,234 @@ linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args)
int
linux_set_thread_area(struct thread *td, struct linux_set_thread_area_args *args)
{
/*
* Return an error code instead of raising a SIGSYS so that
* the caller will fall back to simpler LDT methods.
struct l_user_desc info;
int error;
int idx;
int a[2];
struct segment_descriptor sd;
error = copyin(args->desc, &info, sizeof(struct l_user_desc));
if (error)
return (error);
#ifdef DEBUG
if (ldebug(set_thread_area))
printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, %i, %i, %i\n"),
info.entry_number,
info.base_addr,
info.limit,
info.seg_32bit,
info.contents,
info.read_exec_only,
info.limit_in_pages,
info.seg_not_present,
info.useable);
#endif
idx = info.entry_number;
/* Semantics of linux version: every thread in the system has array
* of 3 tls descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. This
* syscall loads one of the selected tls decriptors with a value
* and also loads GDT descriptors 6, 7 and 8 with the content of the per-thread
* descriptors.
*
* Semantics of fbsd version: I think we can ignore that linux has 3 per-thread
* descriptors and use just the 1st one. The tls_array[] is used only in
* set/get-thread_area() syscalls and for loading the GDT descriptors. In fbsd
* we use just one GDT descriptor for TLS so we will load just one.
* XXX: this doesnt work when user-space process tries to use more then 1 TLS segment
* comment in the linux sources says wine might do that.
*/
return (ENOSYS);
}
int
linux_gettid(struct thread *td, struct linux_gettid_args *args)
{
/* we support just GLIBC TLS now
* we should let 3 proceed as well because we use this segment so
* if code does two subsequent calls it should succeed
*/
if (idx != 6 && idx != -1 && idx != 3)
return (EINVAL);
td->td_retval[0] = td->td_proc->p_pid;
/* we have to copy out the GDT entry we use
* FreeBSD uses GDT entry #3 for storing %gs so load that
* XXX: what if userspace program doesnt check this value and tries
* to use 6, 7 or 8?
*/
idx = info.entry_number = 3;
error = copyout(&info, args->desc, sizeof(struct l_user_desc));
if (error)
return (error);
if (LDT_empty(&info)) {
a[0] = 0;
a[1] = 0;
} else {
a[0] = LDT_entry_a(&info);
a[1] = LDT_entry_b(&info);
}
memcpy(&sd, &a, sizeof(a));
#ifdef DEBUG
if (ldebug(set_thread_area))
printf("Segment created in set_thread_area: lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, type: %i, dpl: %i, p: %i, xx: %i, def32: %i, gran: %i\n", sd.sd_lobase,
sd.sd_hibase,
sd.sd_lolimit,
sd.sd_hilimit,
sd.sd_type,
sd.sd_dpl,
sd.sd_p,
sd.sd_xx,
sd.sd_def32,
sd.sd_gran);
#endif
/* this is taken from i386 version of cpu_set_user_tls() */
critical_enter();
/* set %gs */
td->td_pcb->pcb_gsd = sd;
PCPU_GET(fsgs_gdt)[1] = sd;
load_gs(GSEL(GUGS_SEL, SEL_UPL));
critical_exit();
return (0);
}
int
linux_tkill(struct thread *td, struct linux_tkill_args *args)
linux_get_thread_area(struct thread *td, struct linux_get_thread_area_args *args)
{
struct l_user_desc info;
int error;
int idx;
struct l_desc_struct desc;
struct segment_descriptor sd;
return (linux_kill(td, (struct linux_kill_args *) args));
#ifdef DEBUG
if (ldebug(get_thread_area))
printf(ARGS(get_thread_area, "%p"), args->desc);
#endif
error = copyin(args->desc, &info, sizeof(struct l_user_desc));
if (error)
return (error);
idx = info.entry_number;
/* XXX: I am not sure if we want 3 to be allowed too. */
if (idx != 6 && idx != 3)
return (EINVAL);
idx = 3;
memset(&info, 0, sizeof(info));
sd = PCPU_GET(fsgs_gdt)[1];
memcpy(&desc, &sd, sizeof(desc));
info.entry_number = idx;
info.base_addr = GET_BASE(&desc);
info.limit = GET_LIMIT(&desc);
info.seg_32bit = GET_32BIT(&desc);
info.contents = GET_CONTENTS(&desc);
info.read_exec_only = !GET_WRITABLE(&desc);
info.limit_in_pages = GET_LIMIT_PAGES(&desc);
info.seg_not_present = !GET_PRESENT(&desc);
info.useable = GET_USEABLE(&desc);
error = copyout(&info, args->desc, sizeof(struct l_user_desc));
if (error)
return (EFAULT);
return (0);
}
/* copied from kern/kern_time.c */
int
linux_timer_create(struct thread *td, struct linux_timer_create_args *args)
{
return ktimer_create(td, (struct ktimer_create_args *) args);
}
int
linux_timer_settime(struct thread *td, struct linux_timer_settime_args *args)
{
return ktimer_settime(td, (struct ktimer_settime_args *) args);
}
int
linux_timer_gettime(struct thread *td, struct linux_timer_gettime_args *args)
{
return ktimer_gettime(td, (struct ktimer_gettime_args *) args);
}
int
linux_timer_getoverrun(struct thread *td, struct linux_timer_getoverrun_args *args)
{
return ktimer_getoverrun(td, (struct ktimer_getoverrun_args *) args);
}
int
linux_timer_delete(struct thread *td, struct linux_timer_delete_args *args)
{
return ktimer_delete(td, (struct ktimer_delete_args *) args);
}
/* XXX: this wont work with module - convert it */
int
linux_mq_open(struct thread *td, struct linux_mq_open_args *args)
{
#ifdef P1003_1B_MQUEUE
return kmq_open(td, (struct kmq_open_args *) args);
#else
return (ENOSYS);
#endif
}
int
linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args)
{
#ifdef P1003_1B_MQUEUE
return kmq_unlink(td, (struct kmq_unlink_args *) args);
#else
return (ENOSYS);
#endif
}
int
linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args)
{
#ifdef P1003_1B_MQUEUE
return kmq_timedsend(td, (struct kmq_timedsend_args *) args);
#else
return (ENOSYS);
#endif
}
int
linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args)
{
#ifdef P1003_1B_MQUEUE
return kmq_timedreceive(td, (struct kmq_timedreceive_args *) args);
#else
return (ENOSYS);
#endif
}
int
linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args)
{
#ifdef P1003_1B_MQUEUE
return kmq_notify(td, (struct kmq_notify_args *) args);
#else
return (ENOSYS);
#endif
}
int
linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args)
{
#ifdef P1003_1B_MQUEUE
return kmq_setattr(td, (struct kmq_setattr_args *) args);
#else
return (ENOSYS);
#endif
}

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/vnode.h>
#include <sys/eventhandler.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@ -105,6 +106,18 @@ static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask);
static void exec_linux_setregs(struct thread *td, u_long entry,
u_long stack, u_long ps_strings);
extern void linux_proc_exit(void *, struct proc *, struct image_params *);
extern void linux_proc_exec(void *, struct proc *, struct image_params *);
extern void linux_schedtail(void *, struct proc *);
extern LIST_HEAD(futex_list, futex) futex_list;
extern struct sx emul_shared_lock;
extern struct sx emul_lock;
extern struct mtx futex_mtx;
static eventhandler_tag linux_exit_tag;
static eventhandler_tag linux_schedtail_tag;
static eventhandler_tag linux_exec_tag;
/*
* Linux syscalls return negative errno's, we do positive and map them
* Reference:
@ -804,7 +817,7 @@ exec_linux_setregs(struct thread *td, u_long entry,
struct sysentvec linux_sysvec = {
LINUX_SYS_MAXSYSCALL,
linux_sysent,
0xff,
0,
LINUX_SIGTBLSZ,
bsd_to_linux_signal,
ELAST + 1,
@ -833,7 +846,7 @@ struct sysentvec linux_sysvec = {
struct sysentvec elf_linux_sysvec = {
LINUX_SYS_MAXSYSCALL,
linux_sysent,
0xff,
0,
LINUX_SIGTBLSZ,
bsd_to_linux_signal,
ELAST + 1,
@ -908,6 +921,16 @@ linux_elf_modevent(module_t mod, int type, void *data)
linux_ioctl_register_handler(*lihp);
SET_FOREACH(ldhp, linux_device_handler_set)
linux_device_register_handler(*ldhp);
sx_init(&emul_lock, "emuldata lock");
sx_init(&emul_shared_lock, "emuldata->shared lock");
LIST_INIT(&futex_list);
mtx_init(&futex_mtx, "futex protection lock", NULL, MTX_DEF);
linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit,
NULL, 1000);
linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail,
NULL, 1000);
linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec,
NULL, 1000);
if (bootverbose)
printf("Linux ELF exec handler installed\n");
} else
@ -929,6 +952,12 @@ linux_elf_modevent(module_t mod, int type, void *data)
linux_ioctl_unregister_handler(*lihp);
SET_FOREACH(ldhp, linux_device_handler_set)
linux_device_unregister_handler(*ldhp);
sx_destroy(&emul_lock);
sx_destroy(&emul_shared_lock);
mtx_destroy(&futex_mtx);
EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag);
EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag);
EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag);
if (bootverbose)
printf("Linux ELF exec handler removed\n");
} else