freebsd-skq/sys/i386/linux/linux_misc.c
peter 3cb20067de This is an extract of changes from what I am currently running...
- Optimise the linux a.out loading and uselib system calls so they
  take advantage of some of John's recent interface improvements.
  Basically, this means they make far less map changes than before.
- Attempt to plug some potentially nasty kernel_map memory leaks..
- Improve support for QMAGIC libs (I only use QMAGIC (ie: a.out libraries from
  the slackware 3.0 dist) but this depends on other changes to enhance
  the /compat/linux support)
- uselib goes out through a single exit as part of the resource tracking
  that I did when closing the resource leaks on errors.  This could be
  cleaner than what I did, but making a 30-deep nested if/else was not my
  idea of fun, neither did I want to repeat the same code 30 times over for
  each failure possibility.  I guess this function needs to be split into
  smaller functions to solve this.

I've been running the Linux Netscape-2.0 (with Java) to test this, and apart
from the long-standing problem with the missing scrollbars, it appears to
still work as before with ZMAGIC libs (and the leaks)..  However, I've
been using it with mods for the signal trampoline code for native linux stack
frames on signals and exterminated the blasted sigreturn printf() problem,
so I can't be certain that there is not a dependency on something else.
1996-02-16 18:40:50 +00:00

782 lines
18 KiB
C

/*-
* Copyright (c) 1994-1995 Søren Schmidt
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software withough specific prior written permission
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: linux_misc.c,v 1.11 1996/01/19 22:59:24 dyson Exp $
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/kernel.h>
#include <sys/exec.h>
#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/dirent.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/ioctl.h>
#include <sys/imgact_aout.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/times.h>
#include <sys/utsname.h>
#include <sys/vnode.h>
#include <sys/wait.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/lock.h>
#include <vm/vm_kern.h>
#include <vm/vm_prot.h>
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
#include <machine/cpu.h>
#include <machine/psl.h>
#include <i386/linux/linux.h>
#include <i386/linux/sysproto.h>
struct linux_alarm_args {
unsigned int secs;
};
int
linux_alarm(struct proc *p, struct linux_alarm_args *args, int *retval)
{
struct itimerval it, old_it;
struct timeval tv;
int s;
#ifdef DEBUG
printf("Linux-emul(%d): alarm(%d)\n", p->p_pid, args->secs);
#endif
it.it_value.tv_sec = (long)args->secs;
it.it_value.tv_usec = 0;
it.it_interval.tv_sec = 0;
it.it_interval.tv_usec = 0;
s = splclock();
old_it = p->p_realtimer;
tv = time;
if (timerisset(&old_it.it_value))
if (timercmp(&old_it.it_value, &tv, <))
timerclear(&old_it.it_value);
else
timevalsub(&old_it.it_value, &tv);
splx(s);
if (itimerfix(&it.it_value) || itimerfix(&it.it_interval))
return EINVAL;
s = splclock();
untimeout(realitexpire, (caddr_t)p);
tv = time;
if (timerisset(&it.it_value)) {
timevaladd(&it.it_value, &tv);
timeout(realitexpire, (caddr_t)p, hzto(&it.it_value));
}
p->p_realtimer = it;
splx(s);
if (old_it.it_value.tv_usec)
old_it.it_value.tv_sec++;
*retval = old_it.it_value.tv_sec;
return 0;
}
struct linux_brk_args {
linux_caddr_t dsend;
};
int
linux_brk(struct proc *p, struct linux_brk_args *args, int *retval)
{
#if 0
struct vmspace *vm = p->p_vmspace;
vm_offset_t new, old;
int error;
if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr)
return EINVAL;
if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr)
> p->p_rlimit[RLIMIT_DATA].rlim_cur)
return ENOMEM;
old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize);
new = round_page((vm_offset_t)args->dsend);
*retval = old;
if ((new-old) > 0) {
if (swap_pager_full)
return ENOMEM;
error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error)
return error;
vm->vm_dsize += btoc((new-old));
*retval = (int)(vm->vm_daddr + ctob(vm->vm_dsize));
}
return 0;
#else
struct vmspace *vm = p->p_vmspace;
vm_offset_t new, old;
struct obreak_args /* {
char * nsize;
} */ tmp;
#ifdef DEBUG
printf("Linux-emul(%d): brk(%08x)\n", p->p_pid, args->dsend);
#endif
old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize);
new = (vm_offset_t)args->dsend;
tmp.nsize = (char *) new;
if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp, retval))
retval[0] = (int)new;
else
retval[0] = (int)old;
return 0;
#endif
}
struct linux_uselib_args {
char *library;
};
int
linux_uselib(struct proc *p, struct linux_uselib_args *args, int *retval)
{
struct nameidata ni;
struct vnode *vp;
struct exec *a_out;
struct vattr attr;
unsigned long vmaddr, file_offset;
unsigned long buffer, bss_size;
char *ptr;
char path[MAXPATHLEN];
const char *prefix = "/compat/linux";
size_t sz, len;
int error;
int locked;
#ifdef DEBUG
printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library);
#endif
a_out = NULL;
locked = 0;
vp = NULL;
for (ptr = path; (*ptr = *prefix) != '\0'; ptr++, prefix++) ;
sz = MAXPATHLEN - (ptr - path);
if (error = copyinstr(args->library, ptr, sz, &len))
goto cleanup;
if (*ptr != '/') {
error = EINVAL;
goto cleanup;
}
#ifdef DEBUG
printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, path);
#endif
NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, path, p);
if (error = namei(&ni))
goto cleanup;
vp = ni.ni_vp;
if (vp == NULL) {
error = ENOEXEC; /* ?? */
goto cleanup;
}
/*
* From here on down, we have a locked vnode that must be unlocked.
*/
locked++;
/*
* Writable?
*/
if (vp->v_writecount) {
error = ETXTBSY;
goto cleanup;
}
/*
* Executable?
*/
if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p))
goto cleanup;
if ((vp->v_mount->mnt_flag & MNT_NOEXEC) ||
((attr.va_mode & 0111) == 0) ||
(attr.va_type != VREG)) {
error = ENOEXEC;
goto cleanup;
}
/*
* Sensible size?
*/
if (attr.va_size == 0) {
error = ENOEXEC;
goto cleanup;
}
/*
* Can we access it?
*/
if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p))
goto cleanup;
if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p))
goto cleanup;
/*
* Lock no longer needed
*/
VOP_UNLOCK(vp);
locked = 0;
/*
* Pull in executable header into kernel_map
*/
error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE,
VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0);
if (error)
goto cleanup;
/*
* Is it a Linux binary ?
*/
if (((a_out->a_magic >> 16) & 0xff) != 0x64) {
error = ENOEXEC;
goto cleanup;
}
/* While we are here, we should REALLY do some more checks */
/*
* Set file/virtual offset based on a.out variant.
*/
switch ((int)(a_out->a_magic & 0xffff)) {
case 0413: /* ZMAGIC */
file_offset = 1024;
break;
case 0314: /* QMAGIC */
file_offset = 0;
break;
default:
error = ENOEXEC;
goto cleanup;
}
bss_size = round_page(a_out->a_bss);
/*
* Check various fields in header for validity/bounds.
*/
if (a_out->a_text % NBPG || a_out->a_data % NBPG) {
error = ENOEXEC;
goto cleanup;
}
/* text + data can't exceed file size */
if (a_out->a_data + a_out->a_text > attr.va_size) {
error = EFAULT;
goto cleanup;
}
/*
* text/data/bss must not exceed limits
* XXX: this is not complete. it should check current usage PLUS
* the resources needed by this library.
*/
if (a_out->a_text > MAXTSIZ || a_out->a_data + bss_size > MAXDSIZ ||
a_out->a_data+bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) {
error = ENOMEM;
goto cleanup;
}
/*
* prevent more writers
*/
vp->v_flag |= VTEXT;
/*
* Check if file_offset page aligned,.
* Currently we cannot handle misalinged file offsets,
* and so we read in the entire image (what a waste).
*/
if (file_offset & PGOFSET) {
#ifdef DEBUG
printf("uselib: Non page aligned binary %d\n", file_offset);
#endif
/*
* Map text+data read/write/execute
*/
/* a_entry is the load address and is page aligned */
vmaddr = trunc_page(a_out->a_entry);
/* get anon user mapping, read+write+execute */
error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
a_out->a_text + a_out->a_data, FALSE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error)
goto cleanup;
/* map file into kernel_map */
error = vm_mmap(kernel_map, &buffer,
round_page(a_out->a_text + a_out->a_data + file_offset),
VM_PROT_READ, VM_PROT_READ, MAP_FILE,
(caddr_t)vp, trunc_page(file_offset));
if (error)
goto cleanup;
/* copy from kernel VM space to user space */
error = copyout((caddr_t)(buffer + file_offset), (caddr_t)vmaddr,
a_out->a_text + a_out->a_data);
/* release temporary kernel space */
vm_map_remove(kernel_map, buffer,
round_page(a_out->a_text + a_out->a_data + file_offset));
if (error)
goto cleanup;
}
else {
#ifdef DEBUG
printf("uselib: Page aligned binary %d\n", file_offset);
#endif
/*
* for QMAGIC, a_entry is 20 bytes beyond the load address
* to skip the executable header
*/
vmaddr = trunc_page(a_out->a_entry);
/*
* Map it all into the process's space as a single copy-on-write
* "data" segment.
*/
error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr,
a_out->a_text + a_out->a_data,
VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED,
(caddr_t)vp, file_offset);
if (error)
goto cleanup;
}
#ifdef DEBUG
printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]);
#endif
if (bss_size != 0) {
/*
* Calculate BSS start address
*/
vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data;
/*
* allocate some 'anon' space
*/
error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr,
bss_size, FALSE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error)
goto cleanup;
}
cleanup:
/*
* Unlock vnode if needed
*/
if (locked)
VOP_UNLOCK(vp);
/*
* Release the kernel mapping.
*/
if (a_out)
vm_map_remove(kernel_map, (vm_offset_t)a_out, PAGE_SIZE);
return error;
}
struct linux_select_args {
void *ptr;
};
int
linux_select(struct proc *p, struct linux_select_args *args, int *retval)
{
struct {
int nfds;
fd_set *readfds;
fd_set *writefds;
fd_set *exceptfds;
struct timeval *timeout;
} linux_args;
struct select_args /* {
unsigned int nd;
fd_set *in;
fd_set *ou;
fd_set *ex;
struct timeval *tv;
} */ bsd_args;
int error;
if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
sizeof(linux_args))))
return error;
#ifdef DEBUG
printf("Linux-emul(%d): select(%d, %d, %d, %d, %d)\n",
p->p_pid, linux_args.nfds, linux_args.readfds,
linux_args.writefds, linux_args.exceptfds,
linux_args.timeout);
#endif
bsd_args.nd = linux_args.nfds;
bsd_args.in = linux_args.readfds;
bsd_args.ou = linux_args.writefds;
bsd_args.ex = linux_args.exceptfds;
bsd_args.tv = linux_args.timeout;
return select(p, &bsd_args, retval);
}
struct linux_getpgid_args {
int pid;
};
int
linux_getpgid(struct proc *p, struct linux_getpgid_args *args, int *retval)
{
struct proc *curproc;
#ifdef DEBUG
printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid);
#endif
if (args->pid != p->p_pid) {
if (!(curproc = pfind(args->pid)))
return ESRCH;
}
else
curproc = p;
*retval = curproc->p_pgid;
return 0;
}
int
linux_fork(struct proc *p, void *args, int *retval)
{
int error;
#ifdef DEBUG
printf("Linux-emul(%d): fork()\n", p->p_pid);
#endif
if (error = fork(p, args, retval))
return error;
if (retval[1] == 1)
retval[0] = 0;
return 0;
}
struct linux_mmap_args {
void *ptr;
};
int
linux_mmap(struct proc *p, struct linux_mmap_args *args, int *retval)
{
struct {
linux_caddr_t addr;
int len;
int prot;
int flags;
int fd;
int pos;
} linux_args;
struct mmap_args /* {
caddr_t addr;
size_t len;
int prot;
int flags;
int fd;
long pad;
off_t pos;
} */ bsd_args;
int error;
if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args,
sizeof(linux_args))))
return error;
#ifdef DEBUG
printf("Linux-emul(%d): mmap(%08x, %d, %d, %08x, %d, %d)\n",
p->p_pid, linux_args.addr, linux_args.len, linux_args.prot,
linux_args.flags, linux_args.fd, linux_args.pos);
#endif
bsd_args.flags = 0;
if (linux_args.flags & LINUX_MAP_SHARED)
bsd_args.flags |= MAP_SHARED;
if (linux_args.flags & LINUX_MAP_PRIVATE)
bsd_args.flags |= MAP_PRIVATE;
if (linux_args.flags & LINUX_MAP_FIXED)
bsd_args.flags |= MAP_FIXED;
if (linux_args.flags & LINUX_MAP_ANON)
bsd_args.flags |= MAP_ANON;
bsd_args.addr = linux_args.addr;
bsd_args.len = linux_args.len;
bsd_args.prot = linux_args.prot;
bsd_args.fd = linux_args.fd;
bsd_args.pos = linux_args.pos;
bsd_args.pad = 0;
return mmap(p, &bsd_args, retval);
}
struct linux_pipe_args {
int *pipefds;
};
int
linux_pipe(struct proc *p, struct linux_pipe_args *args, int *retval)
{
int error;
#ifdef DEBUG
printf("Linux-emul(%d): pipe(*)\n", p->p_pid);
#endif
if (error = pipe(p, 0, retval))
return error;
if (error = copyout(retval, args->pipefds, 2*sizeof(int)))
return error;
*retval = 0;
return 0;
}
struct linux_time_args {
linux_time_t *tm;
};
int
linux_time(struct proc *p, struct linux_time_args *args, int *retval)
{
struct timeval tv;
linux_time_t tm;
int error;
#ifdef DEBUG
printf("Linux-emul(%d): time(*)\n", p->p_pid);
#endif
microtime(&tv);
tm = tv.tv_sec;
if (error = copyout(&tm, args->tm, sizeof(linux_time_t)))
return error;
*retval = tv.tv_sec;
return 0;
}
struct linux_tms {
long tms_utime;
long tms_stime;
long tms_cutime;
long tms_cstime;
};
struct linux_tms_args {
char *buf;
};
int
linux_times(struct proc *p, struct linux_tms_args *args, int *retval)
{
struct timeval tv;
struct linux_tms tms;
#ifdef DEBUG
printf("Linux-emul(%d): times(*)\n", p->p_pid);
#endif
tms.tms_utime = p->p_uticks;
tms.tms_stime = p->p_sticks;
tms.tms_cutime = p->p_stats->p_cru.ru_utime.tv_sec * hz +
((p->p_stats->p_cru.ru_utime.tv_usec * hz)/1000000);
tms.tms_cstime = p->p_stats->p_cru.ru_stime.tv_sec * hz +
((p->p_stats->p_cru.ru_stime.tv_usec * hz)/1000000);
microtime(&tv);
*retval = tv.tv_sec * hz + (tv.tv_usec * hz)/1000000;
return (copyout((caddr_t)&tms, (caddr_t)args->buf,
sizeof(struct linux_tms)));
}
struct linux_newuname_t {
char sysname[65];
char nodename[65];
char release[65];
char version[65];
char machine[65];
char domainname[65];
};
struct linux_newuname_args {
char *buf;
};
int
linux_newuname(struct proc *p, struct linux_newuname_args *args, int *retval)
{
struct linux_newuname_t linux_newuname;
#ifdef DEBUG
printf("Linux-emul(%d): newuname(*)\n", p->p_pid);
#endif
bzero(&linux_newuname, sizeof(struct linux_newuname_args));
strncpy(linux_newuname.sysname, ostype, 64);
strncpy(linux_newuname.nodename, hostname, 64);
strncpy(linux_newuname.release, osrelease, 64);
strncpy(linux_newuname.version, version, 64);
strncpy(linux_newuname.machine, machine, 64);
strncpy(linux_newuname.domainname, domainname, 64);
return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf,
sizeof(struct linux_newuname_t)));
}
struct linux_utime_args {
char *fname;
linux_time_t *timeptr;
};
int
linux_utime(struct proc *p, struct linux_utime_args *args, int *retval)
{
struct utimes_args /* {
char *path;
struct timeval *tptr;
} */ bsdutimes;
struct timeval tv;
#ifdef DEBUG
printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname);
#endif
tv.tv_sec = (long)args->timeptr;
tv.tv_usec = 0;
bsdutimes.tptr = &tv;
bsdutimes.path = args->fname;
return utimes(p, &bsdutimes, retval);
}
struct linux_waitpid_args {
int pid;
int *status;
int options;
};
int
linux_waitpid(struct proc *p, struct linux_waitpid_args *args, int *retval)
{
struct wait_args /* {
int pid;
int *status;
int options;
struct rusage *rusage;
} */ tmp;
int error, tmpstat;
#ifdef DEBUG
printf("Linux-emul(%d): waitpid(%d, *, %d)\n",
p->p_pid, args->pid, args->options);
#endif
tmp.pid = args->pid;
tmp.status = args->status;
tmp.options = args->options;
tmp.rusage = NULL;
if (error = wait4(p, &tmp, retval))
return error;
if (error = copyin(args->status, &tmpstat, sizeof(int)))
return error;
if (WIFSIGNALED(tmpstat))
tmpstat = (tmpstat & 0xffffff80) |
bsd_to_linux_signal[WTERMSIG(tmpstat)];
else if (WIFSTOPPED(tmpstat))
tmpstat = (tmpstat & 0xffff00ff) |
(bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
return copyout(&tmpstat, args->status, sizeof(int));
}
struct linux_wait4_args {
int pid;
int *status;
int options;
struct rusage *rusage;
};
int
linux_wait4(struct proc *p, struct linux_wait4_args *args, int *retval)
{
struct wait_args /* {
int pid;
int *status;
int options;
struct rusage *rusage;
} */ tmp;
int error, tmpstat;
#ifdef DEBUG
printf("Linux-emul(%d): wait4(%d, *, %d, *)\n",
p->p_pid, args->pid, args->options);
#endif
tmp.pid = args->pid;
tmp.status = args->status;
tmp.options = args->options;
tmp.rusage = args->rusage;
if (error = wait4(p, &tmp, retval))
return error;
if (error = copyin(args->status, &tmpstat, sizeof(int)))
return error;
if (WIFSIGNALED(tmpstat))
tmpstat = (tmpstat & 0xffffff80) |
bsd_to_linux_signal[WTERMSIG(tmpstat)];
else if (WIFSTOPPED(tmpstat))
tmpstat = (tmpstat & 0xffff00ff) |
(bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8);
return copyout(&tmpstat, args->status, sizeof(int));
}
struct linux_mknod_args {
char *path;
int mode;
int dev;
};
int
linux_mknod(struct proc *p, struct linux_mknod_args *args, int *retval)
{
if (args->mode & S_IFIFO)
return mkfifo(p, (struct mkfifo_args *)args, retval);
else
return mknod(p, (struct mknod_args *)args, retval);
}