freebsd-skq/lib/libkvm/kvm_ia64.c
marcel 5d3af2c5ab Revamp of the syscall path, exception and context handling. The
prime objectives are:
o  Implement a syscall path based on the epc inststruction (see
   sys/ia64/ia64/syscall.s).
o  Revisit the places were we need to save and restore registers
   and define those contexts in terms of the register sets (see
   sys/ia64/include/_regset.h).

Secundairy objectives:
o  Remove the requirement to use contigmalloc for kernel stacks.
o  Better handling of the high FP registers for SMP systems.
o  Switch to the new cpu_switch() and cpu_throw() semantics.
o  Add a good unwinder to reconstruct contexts for the rare
   cases we need to (see sys/contrib/ia64/libuwx)

Many files are affected by this change. Functionally it boils
down to:
o  The EPC syscall doesn't preserve registers it does not need
   to preserve and places the arguments differently on the stack.
   This affects libc and truss.
o  The address of the kernel page directory (kptdir) had to
   be unstaticized for use by the nested TLB fault handler.
   The name has been changed to ia64_kptdir to avoid conflicts.
   The renaming affects libkvm.
o  The trapframe only contains the special registers and the
   scratch registers. For syscalls using the EPC syscall path
   no scratch registers are saved. This affects all places where
   the trapframe is accessed. Most notably the unaligned access
   handler, the signal delivery code and the debugger.
o  Context switching only partly saves the special registers
   and the preserved registers. This affects cpu_switch() and
   triggered the move to the new semantics, which additionally
   affects cpu_throw().
o  The high FP registers are either in the PCB or on some
   CPU. context switching for them is done lazily. This affects
   trap().
o  The mcontext has room for all registers, but not all of them
   have to be defined in all cases. This mostly affects signal
   delivery code now. The *context syscalls are as of yet still
   unimplemented.

Many details went into the removal of the requirement to use
contigmalloc for kernel stacks. The details are mostly CPU
specific and limited to exception_save() and exception_restore().
The few places where we create, destroy or switch stacks were
mostly simplified by not having to construct physical addresses
and additionally saving the virtual addresses for later use.

Besides more efficient context saving and restoring, which of
course yields a noticable speedup, this also fixes the dreaded
SMP bootup problem as a side-effect. The details of which are
still not fully understood.

This change includes all the necessary backward compatibility
code to have it handle older userland binaries that use the
break instruction for syscalls. Support for break-based syscalls
has been pessimized in favor of a clean implementation. Due to
the overall better performance of the kernel, this will still
be notived as an improvement if it's noticed at all.

Approved by: re@ (jhb)
2003-05-16 21:26:42 +00:00

210 lines
5.0 KiB
C

/* $FreeBSD$ */
/* $NetBSD: kvm_alpha.c,v 1.7.2.1 1997/11/02 20:34:26 mellon Exp $ */
/*
* Copyright (c) 1994, 1995 Carnegie-Mellon University.
* All rights reserved.
*
* Author: Chris G. Demetriou
*
* Permission to use, copy, modify and distribute this software and
* its documentation is hereby granted, provided that both the copyright
* notice and this permission notice appear in all copies of the
* software, derivative works or modified versions, and any portions
* thereof, and that both notices appear in supporting documentation.
*
* CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
* CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
* FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
*
* Carnegie Mellon requests users of this software to return to
*
* Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
* School of Computer Science
* Carnegie Mellon University
* Pittsburgh PA 15213-3890
*
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*/
#include <sys/types.h>
#include <sys/elf64.h>
#include <sys/mman.h>
#include <machine/pte.h>
#include <kvm.h>
#include <limits.h>
#include <stdlib.h>
#include <unistd.h>
#include "kvm_private.h"
#define REGION_BASE(n) (((uint64_t)(n)) << 61)
#define REGION_ADDR(x) ((x) & ((1LL<<61)-1LL))
#define NKPTEPG(ps) ((ps) / sizeof(struct ia64_lpte))
#define KPTE_PTE_INDEX(va,ps) (((va)/(ps)) % NKPTEPG(ps))
#define KPTE_DIR_INDEX(va,ps) (((va)/(ps)) / NKPTEPG(ps))
struct vmstate {
void *mmapbase;
size_t mmapsize;
size_t pagesize;
u_long kptdir;
};
/*
* Map the ELF headers into the process' address space. We do this in two
* steps: first the ELF header itself and using that information the whole
* set of headers.
*/
static int
_kvm_maphdrs(kvm_t *kd, size_t sz)
{
struct vmstate *vm = kd->vmst;
/* munmap() previous mmap(). */
if (vm->mmapbase != NULL) {
munmap(vm->mmapbase, vm->mmapsize);
vm->mmapbase = NULL;
}
vm->mmapsize = sz;
vm->mmapbase = mmap(NULL, sz, PROT_READ, MAP_PRIVATE, kd->pmfd, NULL);
if (vm->mmapbase == MAP_FAILED) {
_kvm_err(kd, kd->program, "cannot mmap corefile");
return (-1);
}
return (0);
}
/*
* Translate a physical memory address to a file-offset in the crash-dump.
*/
static size_t
_kvm_pa2off(kvm_t *kd, uint64_t pa, u_long *ofs, size_t pgsz)
{
Elf64_Ehdr *e = kd->vmst->mmapbase;
Elf64_Phdr *p = (Elf64_Phdr*)((char*)e + e->e_phoff);
int n = e->e_phnum;
if (pa != REGION_ADDR(pa)) {
_kvm_err(kd, kd->program, "internal error");
return (0);
}
while (n && (pa < p->p_paddr || pa >= p->p_paddr + p->p_memsz))
p++, n--;
if (n == 0)
return (0);
*ofs = (pa - p->p_paddr) + p->p_offset;
if (pgsz == 0)
return (p->p_memsz - (pa - p->p_paddr));
return (pgsz - ((size_t)pa & (pgsz - 1)));
}
void
_kvm_freevtop(kvm_t *kd)
{
struct vmstate *vm = kd->vmst;
if (vm->mmapbase != NULL)
munmap(vm->mmapbase, vm->mmapsize);
free(vm);
kd->vmst = NULL;
}
int
_kvm_initvtop(kvm_t *kd)
{
struct nlist nlist[2];
uint64_t va;
Elf64_Ehdr *ehdr;
size_t hdrsz;
kd->vmst = (struct vmstate *)_kvm_malloc(kd, sizeof(*kd->vmst));
if (kd->vmst == NULL) {
_kvm_err(kd, kd->program, "cannot allocate vm");
return (-1);
}
kd->vmst->pagesize = getpagesize();
if (_kvm_maphdrs(kd, sizeof(Elf64_Ehdr)) == -1)
return (-1);
ehdr = kd->vmst->mmapbase;
hdrsz = ehdr->e_phoff + ehdr->e_phentsize * ehdr->e_phnum;
if (_kvm_maphdrs(kd, hdrsz) == -1)
return (-1);
/*
* At this point we've got enough information to use kvm_read() for
* direct mapped (ie region 6 and region 7) address, such as symbol
* addresses/values.
*/
nlist[0].n_name = "ia64_kptdir";
nlist[1].n_name = 0;
if (kvm_nlist(kd, nlist) != 0) {
_kvm_err(kd, kd->program, "bad namelist");
return (-1);
}
if (kvm_read(kd, (nlist[0].n_value), &va, sizeof(va)) != sizeof(va)) {
_kvm_err(kd, kd->program, "cannot read kptdir");
return (-1);
}
if (va < REGION_BASE(6)) {
_kvm_err(kd, kd->program, "kptdir is itself virtual");
return (-1);
}
kd->vmst->kptdir = va;
return (0);
}
int
_kvm_kvatop(kvm_t *kd, u_long va, u_long *pa)
{
struct ia64_lpte pte;
uint64_t pgaddr, ptaddr;
size_t pgno, pgsz, ptno;
if (va >= REGION_BASE(6)) {
/* Regions 6 and 7: direct mapped. */
return (_kvm_pa2off(kd, REGION_ADDR(va), pa, 0));
} else if (va >= REGION_BASE(5)) {
/* Region 5: virtual. */
va = REGION_ADDR(va);
pgsz = kd->vmst->pagesize;
ptno = KPTE_DIR_INDEX(va, pgsz);
pgno = KPTE_PTE_INDEX(va, pgsz);
if (ptno >= (pgsz >> 3))
goto fail;
ptaddr = kd->vmst->kptdir + (ptno << 3);
if (kvm_read(kd, ptaddr, &pgaddr, 8) != 8)
goto fail;
if (pgaddr == 0)
goto fail;
pgaddr += (pgno * sizeof(pte));
if (kvm_read(kd, pgaddr, &pte, sizeof(pte)) != sizeof(pte))
goto fail;
if (!pte.pte_p)
goto fail;
va = ((u_long)pte.pte_ppn << 12) + (va & (pgsz - 1));
return (_kvm_pa2off(kd, va, pa, pgsz));
}
fail:
_kvm_err(kd, kd->program, "invalid kernel virtual address");
*pa = ~0UL;
return (0);
}