i386 4/4G split.

The change makes the user and kernel address spaces on i386
independent, giving each almost the full 4G of usable virtual addresses
except for one PDE at top used for trampoline and per-CPU trampoline
stacks, and system structures that must be always mapped, namely IDT,
GDT, common TSS and LDT, and process-private TSS and LDT if allocated.

By using 1:1 mapping for the kernel text and data, it appeared
possible to eliminate assembler part of the locore.S which bootstraps
initial page table and KPTmap.  The code is rewritten in C and moved
into the pmap_cold(). The comment in vmparam.h explains the KVA
layout.

There is no PCID mechanism available in protected mode, so each
kernel/user switch forth and back completely flushes the TLB, except
for the trampoline PTD region. The TLB invalidations for userspace
becomes trivial, because IPI handlers switch page tables. On the other
hand, context switches no longer need to reload %cr3.

copyout(9) was rewritten to use vm_fault_quick_hold().  An issue for
new copyout(9) is compatibility with wiring user buffers around sysctl
handlers. This explains two kind of locks for copyout ptes and
accounting of the vslock() calls.  The vm_fault_quick_hold() AKA slow
path, is only tried after the 'fast path' failed, which temporary
changes mapping to the userspace and copies the data to/from small
per-cpu buffer in the trampoline.  If a page fault occurs during the
copy, it is short-circuit by exception.s to not even reach C code.

The change was motivated by the need to implement the Meltdown
mitigation, but instead of KPTI the full split is done.  The i386
architecture already shows the sizing problems, in particular, it is
impossible to link clang and lld with debugging.  I expect that the
issues due to the virtual address space limits would only exaggerate
and the split gives more liveness to the platform.

Tested by: pho
Discussed with:	bde
Sponsored by:	The FreeBSD Foundation
MFC after:	1 month
Differential revision:	https://reviews.freebsd.org/D14633
This commit is contained in:
Konstantin Belousov 2018-04-13 20:30:49 +00:00
parent 1315f9b59f
commit d86c1f0dc1
49 changed files with 2572 additions and 1820 deletions

View File

@ -29,6 +29,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/proc.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/pcb.h>
#include <machine/frame.h>
#include <machine/segments.h>
@ -279,12 +281,26 @@ kgdb_trgt_frame_cache(struct frame_info *next_frame, void **this_cache)
char buf[MAX_REGISTER_SIZE];
struct kgdb_frame_cache *cache;
char *pname;
CORE_ADDR pcx;
uintptr_t addr, setidt_disp;
cache = *this_cache;
if (cache == NULL) {
cache = FRAME_OBSTACK_ZALLOC(struct kgdb_frame_cache);
*this_cache = cache;
cache->pc = frame_func_unwind(next_frame);
pcx = frame_pc_unwind(next_frame);
if (pcx >= PMAP_TRM_MIN_ADDRESS) {
addr = kgdb_lookup("setidt_disp");
if (addr != 0) {
if (kvm_read(kvm, addr, &setidt_disp,
sizeof(setidt_disp)) !=
sizeof(setidt_disp))
warnx("kvm_read: %s", kvm_geterr(kvm));
else
pcx -= setidt_disp;
}
}
cache->pc = pcx;
find_pc_partial_function(cache->pc, &pname, NULL, NULL);
if (pname[0] != 'X')
cache->frame_type = FT_NORMAL;
@ -373,6 +389,8 @@ kgdb_trgt_trapframe_sniffer(struct frame_info *next_frame)
CORE_ADDR pc;
pc = frame_pc_unwind(next_frame);
if (pc >= PMAP_TRM_MIN_ADDRESS)
return (&kgdb_trgt_trapframe_unwind);
pname = NULL;
find_pc_partial_function(pc, &pname, NULL, NULL);
if (pname == NULL)

View File

@ -483,6 +483,7 @@ i386/i386/atomic.c standard \
i386/i386/bios.c standard
i386/i386/bioscall.s standard
i386/i386/bpf_jit_machdep.c optional bpf_jitter
i386/i386/copyout.c standard
i386/i386/db_disasm.c optional ddb
i386/i386/db_interface.c optional ddb
i386/i386/db_trace.c optional ddb

View File

@ -6,7 +6,7 @@ SEARCH_DIR(/usr/lib);
SECTIONS
{
/* Read-only sections, merged into text segment: */
. = kernbase + kernload + SIZEOF_HEADERS;
. = kernbase + SIZEOF_HEADERS;
.interp : { *(.interp) }
.hash : { *(.hash) }
.gnu.hash : { *(.gnu.hash) }

View File

@ -109,7 +109,11 @@ dcons_crom_expose_idt(struct dcons_crom_softc *sc)
static off_t idt_paddr;
/* XXX */
#ifdef __amd64__
idt_paddr = (char *)idt - (char *)KERNBASE;
#else /* __i386__ */
idt_paddr = (off_t)pmap_kextract((vm_offset_t)idt);
#endif
crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_HI, ADDR_HI(idt_paddr));
crom_add_entry(&sc->unit, DCONS_CSR_KEY_RESET_LO, ADDR_LO(idt_paddr));

View File

@ -309,11 +309,16 @@ dcons_drv_init(int stage)
* Allow read/write access to dcons buffer.
*/
for (pa = trunc_page(addr); pa < addr + size; pa += PAGE_SIZE)
*vtopte(KERNBASE + pa) |= PG_RW;
*vtopte(PMAP_MAP_LOW + pa) |= PG_RW;
invltlb();
#endif
/* XXX P to V */
#ifdef __amd64__
dg.buf = (struct dcons_buf *)(vm_offset_t)(KERNBASE + addr);
#else /* __i386__ */
dg.buf = (struct dcons_buf *)((vm_offset_t)PMAP_MAP_LOW +
addr);
#endif
dg.size = size;
if (dcons_load_buffer(dg.buf, dg.size, sc) < 0)
dg.buf = NULL;

View File

@ -26,11 +26,12 @@
* $FreeBSD$
*/
#include "assym.inc"
#include <machine/psl.h>
#include <machine/asmacros.h>
#include <machine/specialreg.h>
#include "assym.inc"
/*
* This is the Hyper-V vmbus channel direct callback interrupt.
* Only used when it is running on Hyper-V.
@ -42,6 +43,7 @@ IDTVEC(vmbus_isr)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call vmbus_handle_intr

View File

@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/vmparam.h>
#include <machine/pc/bios.h>
#endif
#include <dev/ppbus/ppbconf.h>
@ -121,7 +122,7 @@ static char *ppc_epp_protocol[] = { " (EPP 1.9)", " (EPP 1.7)", 0 };
* BIOS printer list - used by BIOS probe.
*/
#define BIOS_PPC_PORTS 0x408
#define BIOS_PORTS (short *)(KERNBASE+BIOS_PPC_PORTS)
#define BIOS_PORTS ((short *)BIOS_PADDRTOVADDR(BIOS_PPC_PORTS))
#define BIOS_MAX_PPC 4
#endif

View File

@ -288,7 +288,11 @@ ec_putc(int c)
* This is enough for ec_putc() to work very early on x86
* if the kernel starts in normal color text mode.
*/
#ifdef __amd64__
fb = KERNBASE + 0xb8000;
#else /* __i386__ */
fb = PMAP_MAP_LOW + 0xb8000;
#endif
xsize = 80;
ysize = 25;
#endif

View File

@ -894,19 +894,6 @@ options ENABLE_ALART # Control alarm on Intel intpm driver
#
options PMAP_SHPGPERPROC=201
#
# Change the size of the kernel virtual address space. Due to
# constraints in loader(8) on i386, this must be a multiple of 4.
# 256 = 1 GB of kernel address space. Increasing this also causes
# a reduction of the address space in user processes. 512 splits
# the 4GB cpu address space in half (2GB user, 2GB kernel). For PAE
# kernels, the value will need to be double non-PAE. A value of 1024
# for PAE kernels is necessary to split the address space in half.
# This will likely need to be increased to handle memory sizes >4GB.
# PAE kernels default to a value of 512.
#
options KVA_PAGES=260
#
# Number of initial kernel page table pages used for early bootstrap.
# This number should include enough pages to map the kernel, any
@ -951,22 +938,6 @@ device ndis
#####################################################################
# VM OPTIONS
# Disable the 4 MByte page PSE CPU feature. The PSE feature allows the
# kernel to use 4 MByte pages to map the kernel instead of 4k pages.
# This saves on the amount of memory needed for page tables needed to
# map the kernel. You should only disable this feature as a temporary
# workaround if you are having problems with it enabled.
#
#options DISABLE_PSE
# Disable the global pages PGE CPU feature. The PGE feature allows pages
# to be marked with the PG_G bit. TLB entries for these pages are not
# flushed from the cache when %cr3 is reloaded. This can make context
# switches less expensive. You should only disable this feature as a
# temporary workaround if you are having problems with it enabled.
#
#options DISABLE_PG_G
# KSTACK_PAGES is the number of memory pages to assign to the kernel
# stack of each thread.

View File

@ -39,6 +39,7 @@
#include "opt_smp.h"
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/specialreg.h>
#include <x86/apicreg.h>
@ -67,34 +68,39 @@ as_lapic_eoi:
* translates that into a vector, and passes the vector to the
* lapic_handle_intr() function.
*/
#define ISR_VEC(index, vec_name) \
.text ; \
SUPERALIGN_TEXT ; \
IDTVEC(vec_name ## _pti) ; \
IDTVEC(vec_name) ; \
PUSH_FRAME ; \
SET_KERNEL_SREGS ; \
cld ; \
FAKE_MCOUNT(TF_EIP(%esp)) ; \
cmpl $0,x2apic_mode ; \
je 1f ; \
movl $(MSR_APIC_ISR0 + index),%ecx ; \
rdmsr ; \
jmp 2f ; \
1: ; \
movl lapic_map, %edx ;/* pointer to local APIC */ \
movl LA_ISR + 16 * (index)(%edx), %eax ; /* load ISR */ \
2: ; \
bsrl %eax, %eax ; /* index of highest set bit in ISR */ \
jz 3f ; \
addl $(32 * index),%eax ; \
pushl %esp ; \
pushl %eax ; /* pass the IRQ */ \
call lapic_handle_intr ; \
addl $8, %esp ; /* discard parameter */ \
3: ; \
MEXITCOUNT ; \
.macro ISR_VEC index, vec_name
.text
SUPERALIGN_TEXT
.globl X\()\vec_name\()_pti, X\()\vec_name
X\()\vec_name\()_pti:
X\()\vec_name:
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
cmpl $0,x2apic_mode
je 2f
movl $(MSR_APIC_ISR0 + \index),%ecx
rdmsr
jmp 3f
2:
movl lapic_map, %edx /* pointer to local APIC */
movl LA_ISR + 16 * \index(%edx), %eax /* load ISR */
3:
bsrl %eax, %eax /* index of highest set bit in ISR */
jz 4f
addl $(32 * \index),%eax
pushl %esp
pushl %eax /* pass the IRQ */
movl $lapic_handle_intr, %eax
call *%eax
addl $8, %esp /* discard parameter */
4:
MEXITCOUNT
jmp doreti
.endm
/*
* Handle "spurious INTerrupts".
@ -111,13 +117,13 @@ IDTVEC(spuriousint)
iret
ISR_VEC(1, apic_isr1)
ISR_VEC(2, apic_isr2)
ISR_VEC(3, apic_isr3)
ISR_VEC(4, apic_isr4)
ISR_VEC(5, apic_isr5)
ISR_VEC(6, apic_isr6)
ISR_VEC(7, apic_isr7)
ISR_VEC 1, apic_isr1
ISR_VEC 2, apic_isr2
ISR_VEC 3, apic_isr3
ISR_VEC 4, apic_isr4
ISR_VEC 5, apic_isr5
ISR_VEC 6, apic_isr6
ISR_VEC 7, apic_isr7
/*
* Local APIC periodic timer handler.
@ -129,9 +135,11 @@ IDTVEC(timerint)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call lapic_handle_timer
movl $lapic_handle_timer, %eax
call *%eax
add $4, %esp
MEXITCOUNT
jmp doreti
@ -146,8 +154,10 @@ IDTVEC(cmcint)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
call lapic_handle_cmc
movl $lapic_handle_cmc, %eax
call *%eax
MEXITCOUNT
jmp doreti
@ -161,8 +171,10 @@ IDTVEC(errorint)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
call lapic_handle_error
movl $lapic_handle_error, %eax
call *%eax
MEXITCOUNT
jmp doreti
@ -177,9 +189,11 @@ IDTVEC(xen_intr_upcall)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call xen_intr_handle_upcall
movl $xen_intr_handle_upcall, %eax
call *%eax
add $4, %esp
MEXITCOUNT
jmp doreti
@ -200,9 +214,9 @@ IDTVEC(invltlb)
PUSH_FRAME
SET_KERNEL_SREGS
cld
call invltlb_handler
KENTER
movl $invltlb_handler, %eax
call *%eax
jmp invltlb_ret
/*
@ -214,9 +228,9 @@ IDTVEC(invlpg)
PUSH_FRAME
SET_KERNEL_SREGS
cld
call invlpg_handler
KENTER
movl $invlpg_handler, %eax
call *%eax
jmp invltlb_ret
/*
@ -228,9 +242,9 @@ IDTVEC(invlrng)
PUSH_FRAME
SET_KERNEL_SREGS
cld
call invlrng_handler
KENTER
movl $invlrng_handler, %eax
call *%eax
jmp invltlb_ret
/*
@ -242,9 +256,9 @@ IDTVEC(invlcache)
PUSH_FRAME
SET_KERNEL_SREGS
cld
call invlcache_handler
KENTER
movl $invlcache_handler, %eax
call *%eax
jmp invltlb_ret
/*
@ -256,12 +270,11 @@ IDTVEC(ipi_intr_bitmap_handler)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
call as_lapic_eoi
FAKE_MCOUNT(TF_EIP(%esp))
call ipi_bitmap_handler
movl $ipi_bitmap_handler, %eax
call *%eax
MEXITCOUNT
jmp doreti
@ -274,9 +287,10 @@ IDTVEC(cpustop)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
call as_lapic_eoi
call cpustop_handler
movl $cpustop_handler, %eax
call *%eax
jmp doreti
/*
@ -288,9 +302,10 @@ IDTVEC(cpususpend)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
call as_lapic_eoi
call cpususpend_handler
movl $cpususpend_handler, %eax
call *%eax
jmp doreti
/*
@ -304,14 +319,14 @@ IDTVEC(rendezvous)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
#ifdef COUNT_IPIS
movl PCPU(CPUID), %eax
movl ipi_rendezvous_counts(,%eax,4), %eax
incl (%eax)
#endif
call smp_rendezvous_action
movl $smp_rendezvous_action, %eax
call *%eax
call as_lapic_eoi
jmp doreti

View File

@ -36,6 +36,7 @@
* master and slave interrupt controllers.
*/
#include <machine/psl.h>
#include <machine/asmacros.h>
#include "assym.inc"
@ -43,37 +44,41 @@
/*
* Macros for interrupt entry, call to handler, and exit.
*/
#define INTR(irq_num, vec_name) \
.text ; \
SUPERALIGN_TEXT ; \
IDTVEC(vec_name ##_pti) ; \
IDTVEC(vec_name) ; \
PUSH_FRAME ; \
SET_KERNEL_SREGS ; \
cld ; \
; \
FAKE_MCOUNT(TF_EIP(%esp)) ; \
pushl %esp ; \
pushl $irq_num; /* pass the IRQ */ \
call atpic_handle_intr ; \
addl $8, %esp ; /* discard the parameters */ \
; \
MEXITCOUNT ; \
jmp doreti
.macro INTR irq_num, vec_name
.text
SUPERALIGN_TEXT
.globl X\()\vec_name\()_pti, X\()\vec_name
INTR(0, atpic_intr0)
INTR(1, atpic_intr1)
INTR(2, atpic_intr2)
INTR(3, atpic_intr3)
INTR(4, atpic_intr4)
INTR(5, atpic_intr5)
INTR(6, atpic_intr6)
INTR(7, atpic_intr7)
INTR(8, atpic_intr8)
INTR(9, atpic_intr9)
INTR(10, atpic_intr10)
INTR(11, atpic_intr11)
INTR(12, atpic_intr12)
INTR(13, atpic_intr13)
INTR(14, atpic_intr14)
INTR(15, atpic_intr15)
X\()\vec_name\()_pti:
X\()\vec_name:
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
pushl $\irq_num /* pass the IRQ */
movl $atpic_handle_intr, %eax
call *%eax
addl $8, %esp /* discard the parameters */
MEXITCOUNT
jmp doreti
.endm
INTR 0, atpic_intr0
INTR 1, atpic_intr1
INTR 2, atpic_intr2
INTR 3, atpic_intr3
INTR 4, atpic_intr4
INTR 5, atpic_intr5
INTR 6, atpic_intr6
INTR 7, atpic_intr7
INTR 8, atpic_intr8
INTR 9, atpic_intr9
INTR 10, atpic_intr10
INTR 11, atpic_intr11
INTR 12, atpic_intr12
INTR 13, atpic_intr13
INTR 14, atpic_intr14
INTR 15, atpic_intr15

View File

@ -305,6 +305,7 @@ set_bios_selectors(struct bios_segments *seg, int flags)
}
extern int vm86pa;
extern u_long vm86phystk;
extern void bios16_jmp(void);
/*
@ -329,7 +330,7 @@ bios16(struct bios_args *args, char *fmt, ...)
int flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
u_int i, arg_start, arg_end;
pt_entry_t *pte;
pd_entry_t *ptd;
pd_entry_t *ptd, orig_ptd;
arg_start = 0xffffffff;
arg_end = 0;
@ -390,27 +391,14 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
args->seg.code32.limit = 0xffff;
ptd = (pd_entry_t *)rcr3();
#if defined(PAE) || defined(PAE_TABLES)
if (ptd == IdlePDPT)
#else
if (ptd == IdlePTD)
#endif
{
/*
* no page table, so create one and install it.
*/
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
*pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
*ptd = vtophys(pte) | PG_RW | PG_V;
} else {
/*
* this is a user-level page table
*/
pte = PTmap;
*pte = (vm86pa - PAGE_SIZE) | PG_RW | PG_V;
}
/*
* no page table, so create one and install it.
*/
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = IdlePTD;
*pte = vm86phystk | PG_RW | PG_V;
orig_ptd = *ptd;
*ptd = vtophys(pte) | PG_RW | PG_V;
pmap_invalidate_all(kernel_pmap); /* XXX insurance for now */
stack_top = stack;
@ -464,20 +452,12 @@ bios16(struct bios_args *args, char *fmt, ...)
i = bios16_call(&args->r, stack_top);
if (pte == PTmap) {
*pte = 0; /* remove entry */
/*
* XXX only needs to be invlpg(0) but that doesn't work on the 386
*/
pmap_invalidate_all(kernel_pmap);
} else {
*ptd = 0; /* remove page table */
/*
* XXX only needs to be invlpg(0) but that doesn't work on the 386
*/
pmap_invalidate_all(kernel_pmap);
free(pte, M_TEMP); /* ... and free it */
}
*ptd = orig_ptd; /* remove page table */
/*
* XXX only needs to be invlpg(0) but that doesn't work on the 386
*/
pmap_invalidate_all(kernel_pmap);
free(pte, M_TEMP); /* ... and free it */
return (i);
}

489
sys/i386/i386/copyout.c Normal file
View File

@ -0,0 +1,489 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_extern.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_page.h>
#if defined(PAE) || defined(PAE_TABLES)
#define KCR3 ((u_int)IdlePDPT)
#else
#define KCR3 ((u_int)IdlePTD)
#endif
int copyin_fast(const void *udaddr, void *kaddr, size_t len, u_int);
static int (*copyin_fast_tramp)(const void *, void *, size_t, u_int);
int copyout_fast(const void *kaddr, void *udaddr, size_t len, u_int);
static int (*copyout_fast_tramp)(const void *, void *, size_t, u_int);
int fubyte_fast(volatile const void *base, u_int kcr3);
static int (*fubyte_fast_tramp)(volatile const void *, u_int);
int fuword16_fast(volatile const void *base, u_int kcr3);
static int (*fuword16_fast_tramp)(volatile const void *, u_int);
int fueword_fast(volatile const void *base, long *val, u_int kcr3);
static int (*fueword_fast_tramp)(volatile const void *, long *, u_int);
int subyte_fast(volatile void *base, int val, u_int kcr3);
static int (*subyte_fast_tramp)(volatile void *, int, u_int);
int suword16_fast(volatile void *base, int val, u_int kcr3);
static int (*suword16_fast_tramp)(volatile void *, int, u_int);
int suword_fast(volatile void *base, long val, u_int kcr3);
static int (*suword_fast_tramp)(volatile void *, long, u_int);
static int fast_copyout = 1;
SYSCTL_INT(_machdep, OID_AUTO, fast_copyout, CTLFLAG_RWTUN,
&fast_copyout, 0,
"");
void
copyout_init_tramp(void)
{
copyin_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
(uintptr_t)copyin_fast + setidt_disp);
copyout_fast_tramp = (int (*)(const void *, void *, size_t, u_int))(
(uintptr_t)copyout_fast + setidt_disp);
fubyte_fast_tramp = (int (*)(volatile const void *, u_int))(
(uintptr_t)fubyte_fast + setidt_disp);
fuword16_fast_tramp = (int (*)(volatile const void *, u_int))(
(uintptr_t)fuword16_fast + setidt_disp);
fueword_fast_tramp = (int (*)(volatile const void *, long *, u_int))(
(uintptr_t)fueword_fast + setidt_disp);
subyte_fast_tramp = (int (*)(volatile void *, int, u_int))(
(uintptr_t)subyte_fast + setidt_disp);
suword16_fast_tramp = (int (*)(volatile void *, int, u_int))(
(uintptr_t)suword16_fast + setidt_disp);
suword_fast_tramp = (int (*)(volatile void *, long, u_int))(
(uintptr_t)suword_fast + setidt_disp);
}
static int
cp_slow0(vm_offset_t uva, size_t len, bool write,
void (*f)(vm_offset_t, void *), void *arg)
{
struct pcpu *pc;
vm_page_t m[2];
pt_entry_t *pte;
vm_offset_t kaddr;
int error, i, plen;
bool sleepable;
plen = howmany(uva - trunc_page(uva) + len, PAGE_SIZE);
MPASS(plen <= nitems(m));
error = 0;
i = vm_fault_quick_hold_pages(&curproc->p_vmspace->vm_map, uva, len,
(write ? VM_PROT_WRITE : VM_PROT_READ) | VM_PROT_QUICK_NOFAULT,
m, nitems(m));
if (i != plen)
return (EFAULT);
sched_pin();
pc = get_pcpu();
if (!THREAD_CAN_SLEEP() || curthread->td_vslock_sz > 0 ||
(curthread->td_pflags & TDP_NOFAULTING) != 0) {
sleepable = false;
mtx_lock(&pc->pc_copyout_mlock);
kaddr = pc->pc_copyout_maddr;
} else {
sleepable = true;
sx_xlock(&pc->pc_copyout_slock);
kaddr = pc->pc_copyout_saddr;
}
for (i = 0, pte = vtopte(kaddr); i < plen; i++, pte++) {
*pte = PG_V | PG_RW | PG_A | PG_M | VM_PAGE_TO_PHYS(m[i]) |
pmap_cache_bits(pmap_page_get_memattr(m[i]), FALSE);
invlpg(kaddr + ptoa(i));
}
kaddr += uva - trunc_page(uva);
f(kaddr, arg);
sched_unpin();
if (sleepable)
sx_xunlock(&pc->pc_copyout_slock);
else
mtx_unlock(&pc->pc_copyout_mlock);
for (i = 0; i < plen; i++) {
vm_page_lock(m[i]);
vm_page_unhold(m[i]);
vm_page_unlock(m[i]);
}
return (error);
}
struct copyinstr_arg0 {
vm_offset_t kc;
size_t len;
size_t alen;
bool end;
};
static void
copyinstr_slow0(vm_offset_t kva, void *arg)
{
struct copyinstr_arg0 *ca;
char c;
ca = arg;
MPASS(ca->alen == 0 && ca->len > 0 && !ca->end);
while (ca->alen < ca->len && !ca->end) {
c = *(char *)(kva + ca->alen);
*(char *)ca->kc = c;
ca->alen++;
ca->kc++;
if (c == '\0')
ca->end = true;
}
}
int
copyinstr(const void *udaddr, void *kaddr, size_t maxlen, size_t *lencopied)
{
struct copyinstr_arg0 ca;
vm_offset_t uc;
size_t plen;
int error;
error = 0;
ca.end = false;
for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
plen < maxlen && !ca.end; uc += ca.alen, plen += ca.alen) {
ca.len = round_page(uc) - uc;
if (ca.len == 0)
ca.len = PAGE_SIZE;
if (plen + ca.len > maxlen)
ca.len = maxlen - plen;
ca.alen = 0;
if (cp_slow0(uc, ca.len, false, copyinstr_slow0, &ca) != 0) {
error = EFAULT;
break;
}
}
if (!ca.end && plen == maxlen && error == 0)
error = ENAMETOOLONG;
if (lencopied != NULL)
*lencopied = plen;
return (error);
}
struct copyin_arg0 {
vm_offset_t kc;
size_t len;
};
static void
copyin_slow0(vm_offset_t kva, void *arg)
{
struct copyin_arg0 *ca;
ca = arg;
bcopy((void *)kva, (void *)ca->kc, ca->len);
}
int
copyin(const void *udaddr, void *kaddr, size_t len)
{
struct copyin_arg0 ca;
vm_offset_t uc;
size_t plen;
if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
(uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
return (-1);
if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
copyin_fast_tramp(udaddr, kaddr, len, KCR3) == 0))
return (0);
for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
ca.len = round_page(uc) - uc;
if (ca.len == 0)
ca.len = PAGE_SIZE;
if (plen + ca.len > len)
ca.len = len - plen;
if (cp_slow0(uc, ca.len, false, copyin_slow0, &ca) != 0)
return (EFAULT);
}
return (0);
}
static void
copyout_slow0(vm_offset_t kva, void *arg)
{
struct copyin_arg0 *ca;
ca = arg;
bcopy((void *)ca->kc, (void *)kva, ca->len);
}
int
copyout(const void *kaddr, void *udaddr, size_t len)
{
struct copyin_arg0 ca;
vm_offset_t uc;
size_t plen;
if ((uintptr_t)udaddr + len < (uintptr_t)udaddr ||
(uintptr_t)udaddr + len > VM_MAXUSER_ADDRESS)
return (-1);
if (len == 0 || (fast_copyout && len <= TRAMP_COPYOUT_SZ &&
copyout_fast_tramp(kaddr, udaddr, len, KCR3) == 0))
return (0);
for (plen = 0, uc = (vm_offset_t)udaddr, ca.kc = (vm_offset_t)kaddr;
plen < len; uc += ca.len, ca.kc += ca.len, plen += ca.len) {
ca.len = round_page(uc) - uc;
if (ca.len == 0)
ca.len = PAGE_SIZE;
if (plen + ca.len > len)
ca.len = len - plen;
if (cp_slow0(uc, ca.len, true, copyout_slow0, &ca) != 0)
return (EFAULT);
}
return (0);
}
/*
* Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
* memory.
*/
static void
fubyte_slow0(vm_offset_t kva, void *arg)
{
*(int *)arg = *(u_char *)kva;
}
int
fubyte(volatile const void *base)
{
int res;
if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
(uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout) {
res = fubyte_fast_tramp(base, KCR3);
if (res != -1)
return (res);
}
if (cp_slow0((vm_offset_t)base, sizeof(char), false, fubyte_slow0,
&res) != 0)
return (-1);
return (res);
}
static void
fuword16_slow0(vm_offset_t kva, void *arg)
{
*(int *)arg = *(uint16_t *)kva;
}
int
fuword16(volatile const void *base)
{
int res;
if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
(uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout) {
res = fuword16_fast_tramp(base, KCR3);
if (res != -1)
return (res);
}
if (cp_slow0((vm_offset_t)base, sizeof(uint16_t), false,
fuword16_slow0, &res) != 0)
return (-1);
return (res);
}
static void
fueword_slow0(vm_offset_t kva, void *arg)
{
*(uint32_t *)arg = *(uint32_t *)kva;
}
int
fueword(volatile const void *base, long *val)
{
uint32_t res;
if ((uintptr_t)base + sizeof(*val) < (uintptr_t)base ||
(uintptr_t)base + sizeof(*val) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout) {
if (fueword_fast_tramp(base, val, KCR3) == 0)
return (0);
}
if (cp_slow0((vm_offset_t)base, sizeof(long), false, fueword_slow0,
&res) != 0)
return (-1);
*val = res;
return (0);
}
int
fueword32(volatile const void *base, int32_t *val)
{
return (fueword(base, (long *)val));
}
/*
* Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
*/
static void
subyte_slow0(vm_offset_t kva, void *arg)
{
*(u_char *)kva = *(int *)arg;
}
int
subyte(volatile void *base, int byte)
{
if ((uintptr_t)base + sizeof(uint8_t) < (uintptr_t)base ||
(uintptr_t)base + sizeof(uint8_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout && subyte_fast_tramp(base, byte, KCR3) == 0)
return (0);
return (cp_slow0((vm_offset_t)base, sizeof(u_char), true, subyte_slow0,
&byte) != 0 ? -1 : 0);
}
static void
suword16_slow0(vm_offset_t kva, void *arg)
{
*(int *)kva = *(uint16_t *)arg;
}
int
suword16(volatile void *base, int word)
{
if ((uintptr_t)base + sizeof(uint16_t) < (uintptr_t)base ||
(uintptr_t)base + sizeof(uint16_t) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout && suword16_fast_tramp(base, word, KCR3) == 0)
return (0);
return (cp_slow0((vm_offset_t)base, sizeof(int16_t), true,
suword16_slow0, &word) != 0 ? -1 : 0);
}
static void
suword_slow0(vm_offset_t kva, void *arg)
{
*(int *)kva = *(uint32_t *)arg;
}
int
suword(volatile void *base, long word)
{
if ((uintptr_t)base + sizeof(word) < (uintptr_t)base ||
(uintptr_t)base + sizeof(word) > VM_MAXUSER_ADDRESS)
return (-1);
if (fast_copyout && suword_fast_tramp(base, word, KCR3) == 0)
return (0);
return (cp_slow0((vm_offset_t)base, sizeof(long), true,
suword_slow0, &word) != 0 ? -1 : 0);
}
int
suword32(volatile void *base, int32_t word)
{
return (suword(base, word));
}
struct casueword_arg0 {
uint32_t oldval;
uint32_t newval;
};
static void
casueword_slow0(vm_offset_t kva, void *arg)
{
struct casueword_arg0 *ca;
ca = arg;
atomic_fcmpset_int((u_int *)kva, &ca->oldval, ca->newval);
}
int
casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
uint32_t newval)
{
struct casueword_arg0 ca;
int res;
ca.oldval = oldval;
ca.newval = newval;
res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true,
casueword_slow0, &ca);
if (res == 0) {
*oldvalp = ca.oldval;
return (0);
}
return (-1);
}
int
casueword(volatile u_long *base, u_long oldval, u_long *oldvalp, u_long newval)
{
struct casueword_arg0 ca;
int res;
ca.oldval = oldval;
ca.newval = newval;
res = cp_slow0((vm_offset_t)base, sizeof(int32_t), true,
casueword_slow0, &ca);
if (res == 0) {
*oldvalp = ca.oldval;
return (0);
}
return (-1);
}

View File

@ -0,0 +1,362 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2018 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Konstantin Belousov <kib@FreeBSD.org>
* under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <machine/asmacros.h>
#include <machine/cputypes.h>
#include <machine/pmap.h>
#include <machine/specialreg.h>
#include "assym.inc"
.text
ENTRY(copyout_fast)
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %edi
pushl %ebx
movl $copyout_fault,%edx
movl 20(%ebp),%ebx /* KCR3 */
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%edi
cli
movl PCPU(TRAMPSTK),%esi
movl PCPU(COPYOUT_BUF),%eax
subl $4,%esi
movl %eax,(%esi)
movl 12(%ebp),%eax /* udaddr */
subl $4,%esi
movl %eax,(%esi)
movl 16(%ebp),%eax /* len */
subl $4,%esi
movl %eax,(%esi)
subl $4, %esi
movl %edi, (%esi)
movl 8(%ebp),%eax /* kaddr */
subl $4,%esi
movl %eax,(%esi)
movl PCPU(COPYOUT_BUF),%eax
subl $4,%esi
movl %eax,(%esi)
movl 16(%ebp),%eax /* len */
subl $4,%esi
movl %eax,(%esi)
movl %esp,%eax
movl %esi,%esp
/* bcopy(%esi = kaddr, %edi = PCPU(copyout_buf), %ecx = len) */
popl %ecx
popl %edi
popl %esi
rep; movsb
popl %edi
movl %edi,%cr3
/* bcopy(%esi = PCPU(copyout_buf), %edi = udaddr, %ecx = len) */
popl %ecx
popl %edi
popl %esi
rep; movsb
movl %ebx,%cr3
movl %eax,%esp
sti
xorl %eax,%eax
popl %ebx
popl %edi
popl %esi
leave
ret
END(copyout_fast)
ENTRY(copyin_fast)
pushl %ebp
movl %esp, %ebp
pushl %esi
pushl %edi
pushl %ebx
movl $copyout_fault,%edx
movl 20(%ebp),%ebx /* KCR3 */
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%edi
cli
movl PCPU(TRAMPSTK),%esi
movl PCPU(COPYOUT_BUF),%eax
subl $4,%esi
movl %eax,(%esi)
movl 12(%ebp),%eax /* kaddr */
subl $4,%esi
movl %eax,(%esi)
movl 16(%ebp),%eax /* len */
subl $4,%esi
movl %eax,(%esi)
movl 8(%ebp),%eax /* udaddr */
subl $4,%esi
movl %eax,(%esi)
movl PCPU(COPYOUT_BUF),%eax
subl $4,%esi
movl %eax,(%esi)
movl 16(%ebp),%eax /* len */
subl $4,%esi
movl %eax,(%esi)
movl %esp,%eax
movl %esi,%esp
movl %edi,%cr3
/* bcopy(%esi = udaddr, %edi = PCPU(copyout_buf), %ecx = len) */
popl %ecx
popl %edi
popl %esi
rep; movsb
movl %ebx,%cr3
/* bcopy(%esi = PCPU(copyout_buf), %edi = kaddr, %ecx = len) */
popl %ecx
popl %edi
popl %esi
rep; movsb
movl %eax,%esp
sti
xorl %eax,%eax
popl %ebx
popl %edi
popl %esi
leave
ret
END(copyin_fast)
ALIGN_TEXT
copyout_fault:
movl %eax,%esp
sti
movl $EFAULT,%eax
popl %ebx
popl %edi
popl %esi
leave
ret
ENTRY(fueword_fast)
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %esi
pushl %edi
movl 8(%ebp),%ecx /* from */
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%eax
movl $fusufault,%edx
movl 16(%ebp),%ebx
movl %esp,%esi
cli
movl PCPU(TRAMPSTK),%esp
movl %eax,%cr3
movl (%ecx),%eax
movl %ebx,%cr3
movl %esi,%esp
sti
movl 12(%ebp),%edx
movl %eax,(%edx)
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
leave
ret
END(fueword_fast)
ENTRY(fuword16_fast)
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %esi
pushl %edi
movl 8(%ebp),%ecx /* from */
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%eax
movl $fusufault,%edx
movl 12(%ebp),%ebx
movl %esp,%esi
cli
movl PCPU(TRAMPSTK),%esp
movl %eax,%cr3
movzwl (%ecx),%eax
movl %ebx,%cr3
movl %esi,%esp
sti
popl %edi
popl %esi
popl %ebx
leave
ret
END(fuword16_fast)
ENTRY(fubyte_fast)
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %esi
pushl %edi
movl 8(%ebp),%ecx /* from */
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%eax
movl $fusufault,%edx
movl 12(%ebp),%ebx
movl %esp,%esi
cli
movl PCPU(TRAMPSTK),%esp
movl %eax,%cr3
movzbl (%ecx),%eax
movl %ebx,%cr3
movl %esi,%esp
sti
popl %edi
popl %esi
popl %ebx
leave
ret
END(fubyte_fast)
ALIGN_TEXT
fusufault:
movl %esi,%esp
sti
xorl %eax,%eax
decl %eax
popl %edi
popl %esi
popl %ebx
leave
ret
ENTRY(suword_fast)
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %esi
pushl %edi
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%eax
movl $fusufault,%edx
movl 8(%ebp),%ecx /* to */
movl 12(%ebp),%edi /* val */
movl 16(%ebp),%ebx
movl %esp,%esi
cli
movl PCPU(TRAMPSTK),%esp
movl %eax,%cr3
movl %edi,(%ecx)
movl %ebx,%cr3
movl %esi,%esp
sti
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
leave
ret
END(suword_fast)
ENTRY(suword16_fast)
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %esi
pushl %edi
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%eax
movl $fusufault,%edx
movl 8(%ebp),%ecx /* to */
movl 12(%ebp),%edi /* val */
movl 16(%ebp),%ebx
movl %esp,%esi
cli
movl PCPU(TRAMPSTK),%esp
movl %eax,%cr3
movw %di,(%ecx)
movl %ebx,%cr3
movl %esi,%esp
sti
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
leave
ret
END(suword16_fast)
ENTRY(subyte_fast)
pushl %ebp
movl %esp,%ebp
pushl %ebx
pushl %esi
pushl %edi
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax),%eax
movl $fusufault,%edx
movl 8(%ebp),%ecx /* to */
movl 12(%ebp),%edi /* val */
movl 16(%ebp),%ebx
movl %esp,%esi
cli
movl PCPU(TRAMPSTK),%esp
movl %eax,%cr3
movl %edi,%eax
movb %al,(%ecx)
movl %ebx,%cr3
movl %esi,%esp
sti
xorl %eax,%eax
popl %edi
popl %esi
popl %ebx
leave
ret
END(subyte_fast)

View File

@ -115,4 +115,7 @@ db_show_mdpcpu(struct pcpu *pc)
db_printf("APIC ID = %d\n", pc->pc_apic_id);
db_printf("currentldt = 0x%x\n", pc->pc_currentldt);
db_printf("trampstk = 0x%x\n", pc->pc_trampstk);
db_printf("kesp0 = 0x%x\n", pc->pc_kesp0);
db_printf("common_tssp = 0x%x\n", (u_int)pc->pc_common_tssp);
}

View File

@ -317,7 +317,12 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
* actually made the call.
*/
frame_type = NORMAL;
sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset);
if (eip >= PMAP_TRM_MIN_ADDRESS) {
sym = db_search_symbol(eip - 1 - setidt_disp, DB_STGY_ANY,
&offset);
} else {
sym = db_search_symbol(eip - 1, DB_STGY_ANY, &offset);
}
db_symbol_values(sym, &name, NULL);
if (name != NULL) {
if (strcmp(name, "calltrap") == 0 ||
@ -357,9 +362,9 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
* switch to a known good state.
*/
if (frame_type == DOUBLE_FAULT) {
esp = PCPU_GET(common_tss.tss_esp);
eip = PCPU_GET(common_tss.tss_eip);
ebp = PCPU_GET(common_tss.tss_ebp);
esp = PCPU_GET(common_tssp)->tss_esp;
eip = PCPU_GET(common_tssp)->tss_eip;
ebp = PCPU_GET(common_tssp)->tss_ebp;
db_printf(
"--- trap 0x17, eip = %#r, esp = %#r, ebp = %#r ---\n",
eip, esp, ebp);
@ -379,30 +384,41 @@ db_nextframe(struct i386_frame **fp, db_addr_t *ip, struct thread *td)
else
tf = (struct trapframe *)((int)*fp + 12);
if (INKERNEL((int) tf)) {
esp = get_esp(tf);
eip = tf->tf_eip;
ebp = tf->tf_ebp;
switch (frame_type) {
case TRAP:
db_printf("--- trap %#r", tf->tf_trapno);
break;
case SYSCALL:
db_printf("--- syscall");
decode_syscall(tf->tf_eax, td);
break;
case TRAP_TIMERINT:
case TRAP_INTERRUPT:
case INTERRUPT:
db_printf("--- interrupt");
break;
default:
panic("The moon has moved again.");
}
db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip,
esp, ebp);
esp = get_esp(tf);
eip = tf->tf_eip;
ebp = tf->tf_ebp;
switch (frame_type) {
case TRAP:
db_printf("--- trap %#r", tf->tf_trapno);
break;
case SYSCALL:
db_printf("--- syscall");
decode_syscall(tf->tf_eax, td);
break;
case TRAP_TIMERINT:
case TRAP_INTERRUPT:
case INTERRUPT:
db_printf("--- interrupt");
break;
default:
panic("The moon has moved again.");
}
db_printf(", eip = %#r, esp = %#r, ebp = %#r ---\n", eip, esp, ebp);
switch (frame_type) {
case TRAP:
case TRAP_TIMERINT:
case TRAP_INTERRUPT:
case INTERRUPT:
if ((tf->tf_eflags & PSL_VM) != 0 ||
(tf->tf_cs & SEL_RPL_MASK) != 0)
ebp = 0;
break;
case SYSCALL:
ebp = 0;
break;
}
*ip = (db_addr_t) eip;
*fp = (struct i386_frame *) ebp;
}
@ -432,6 +448,10 @@ db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
return (0);
}
/* 'frame' can be null initially. Just print the pc then. */
if (frame == NULL)
goto out;
/*
* If an indirect call via an invalid pointer caused a trap,
* %pc contains the invalid address while the return address
@ -540,15 +560,20 @@ db_backtrace(struct thread *td, struct trapframe *tf, struct i386_frame *frame,
db_nextframe(&frame, &pc, td);
if (INKERNEL((int)pc) && !INKERNEL((int) frame)) {
out:
/*
* 'frame' can be null here, either because it was initially
* null or because db_nextframe() found no frame.
* db_nextframe() may also have found a non-kernel frame.
* !INKERNEL() classifies both. Stop tracing if either,
* after printing the pc if it is the kernel.
*/
if (frame == NULL || frame <= actframe) {
sym = db_search_symbol(pc, DB_STGY_ANY, &offset);
db_symbol_values(sym, &name, NULL);
db_print_stack_entry(name, 0, 0, 0, pc, frame);
break;
}
if (!INKERNEL((int) frame)) {
break;
}
}
return (0);

View File

@ -137,7 +137,6 @@ SYSINIT(kelf32, SI_SUB_EXEC, SI_ORDER_ANY,
(sysinit_cfunc_t) elf32_insert_brand_entry,
&kfreebsd_brand_info);
void
elf32_dump_thread(struct thread *td, void *dst, size_t *off)
{

View File

@ -1,11 +1,13 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
* Copyright (c) 2007 The FreeBSD Foundation
* Copyright (c) 2007, 2018 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
* Portions of this software were developed by Konstantin Belousov
* <kib@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -38,14 +40,11 @@
#include "opt_atpic.h"
#include "opt_hwpmc_hooks.h"
#include <machine/asmacros.h>
#include <machine/psl.h>
#include <machine/trap.h>
#include "assym.inc"
#define SEL_RPL_MASK 0x0003
#define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */
#include <machine/psl.h>
#include <machine/asmacros.h>
#include <machine/trap.h>
#ifdef KDTRACE_HOOKS
.bss
@ -63,20 +62,19 @@ dtrace_invop_calltrap_addr:
.zero 8
#endif
.text
#ifdef HWPMC_HOOKS
ENTRY(start_exceptions)
#endif
ENTRY(start_exceptions)
.globl tramp_idleptd
tramp_idleptd: .long 0
/*****************************************************************************/
/* Trap handling */
/*****************************************************************************/
/*
* Trap and fault vector routines.
*
* Most traps are 'trap gates', SDT_SYS386TGT. A trap gate pushes state on
* the stack that mostly looks like an interrupt, but does not disable
* interrupts. A few of the traps we are use are interrupt gates,
* SDT_SYS386IGT, which are nearly the same thing except interrupts are
* disabled on entry.
* All traps are 'interrupt gates', SDT_SYS386IGT. Interrupts are disabled
* by hardware to not allow interrupts until code switched to the kernel
* address space and the kernel thread stack.
*
* The cpu will push a certain amount of state onto the kernel stack for
* the current process. The amount of state depends on the type of trap
@ -92,6 +90,10 @@ dtrace_invop_calltrap_addr:
* must restore them prior to calling 'iret'. The cpu adjusts the %cs and
* %ss segment registers, but does not mess with %ds, %es, or %fs. Thus we
* must load them with appropriate values for supervisor mode operation.
*
* This code is not executed at the linked address, it is copied to the
* trampoline area. As the consequence, all code there and in included files
* must be PIC.
*/
MCOUNT_LABEL(user)
@ -103,8 +105,6 @@ IDTVEC(div)
pushl $0; TRAP(T_DIVIDE)
IDTVEC(dbg)
pushl $0; TRAP(T_TRCTRAP)
IDTVEC(nmi)
pushl $0; TRAP(T_NMI)
IDTVEC(bpt)
pushl $0; TRAP(T_BPTFLT)
IDTVEC(dtrace_ret)
@ -124,15 +124,23 @@ IDTVEC(fpusegm)
IDTVEC(tss)
TRAP(T_TSSFLT)
IDTVEC(missing)
TRAP(T_SEGNPFLT)
pushl $T_SEGNPFLT
jmp irettraps
IDTVEC(stk)
TRAP(T_STKFLT)
pushl $T_STKFLT
jmp irettraps
IDTVEC(prot)
TRAP(T_PROTFLT)
pushl $T_PROTFLT
jmp irettraps
IDTVEC(page)
TRAP(T_PAGEFLT)
IDTVEC(mchk)
pushl $0; TRAP(T_MCHK)
cmpl $PMAP_TRM_MIN_ADDRESS, TF_EIP-TF_ERR(%esp)
jb 1f
movl %ebx, %cr3
movl %edx, TF_EIP-TF_ERR(%esp)
addl $4, %esp
iret
1: pushl $T_PAGEFLT
jmp alltraps
IDTVEC(rsvd_pti)
IDTVEC(rsvd)
pushl $0; TRAP(T_RESERVED)
@ -144,7 +152,8 @@ IDTVEC(xmm)
pushl $0; TRAP(T_XMMFLT)
/*
* All traps except ones for syscalls jump to alltraps. If
* All traps except ones for syscalls or invalid segment,
* jump to alltraps. If
* interrupts were enabled when the trap occurred, then interrupts
* are enabled now if the trap was through a trap gate, else
* disabled if the trap was through an interrupt gate. Note that
@ -156,20 +165,16 @@ IDTVEC(xmm)
.globl alltraps
.type alltraps,@function
alltraps:
pushal
pushl $0
movw %ds,(%esp)
pushl $0
movw %es,(%esp)
pushl $0
movw %fs,(%esp)
PUSH_FRAME2
alltraps_with_regs_pushed:
SET_KERNEL_SREGS
cld
KENTER
FAKE_MCOUNT(TF_EIP(%esp))
calltrap:
pushl %esp
call trap
movl $trap,%eax
call *%eax
add $4, %esp
/*
@ -178,28 +183,84 @@ calltrap:
MEXITCOUNT
jmp doreti
.globl irettraps
.type irettraps,@function
irettraps:
testl $PSL_VM, TF_EFLAGS-TF_TRAPNO(%esp)
jnz alltraps
testb $SEL_RPL_MASK, TF_CS-TF_TRAPNO(%esp)
jnz alltraps
/*
* Kernel mode.
* The special case there is the kernel mode with user %cr3 and
* trampoline stack. We need to copy both current frame and the
* hardware portion of the frame we tried to return to, to the
* normal stack. This logic must follow the stack unwind order
* in doreti.
*/
PUSH_FRAME2
SET_KERNEL_SREGS
cld
call 1f
1: popl %ebx
leal (doreti_iret - 1b)(%ebx), %edx
cmpl %edx, TF_EIP(%esp)
jne 2f
movl $(2 * TF_SZ - TF_EIP), %ecx
jmp 6f
2: leal (doreti_popl_ds - 1b)(%ebx), %edx
cmpl %edx, TF_EIP(%esp)
jne 3f
movl $(2 * TF_SZ - TF_DS), %ecx
jmp 6f
3: leal (doreti_popl_es - 1b)(%ebx), %edx
cmpl %edx, TF_EIP(%esp)
jne 4f
movl $(2 * TF_SZ - TF_ES), %ecx
jmp 6f
4: leal (doreti_popl_fs - 1b)(%ebx), %edx
cmpl %edx, TF_EIP(%esp)
jne 5f
movl $(2 * TF_SZ - TF_FS), %ecx
jmp 6f
/* kernel mode, normal */
5: FAKE_MCOUNT(TF_EIP(%esp))
jmp calltrap
6: cmpl $PMAP_TRM_MIN_ADDRESS, %esp /* trampoline stack ? */
jb 5b /* if not, no need to change stacks */
movl (tramp_idleptd - 1b)(%ebx), %eax
movl %eax, %cr3
movl PCPU(KESP0), %edx
subl %ecx, %edx
movl %edx, %edi
movl %esp, %esi
rep; movsb
movl %edx, %esp
FAKE_MCOUNT(TF_EIP(%esp))
jmp calltrap
/*
* Privileged instruction fault.
*/
#ifdef KDTRACE_HOOKS
SUPERALIGN_TEXT
IDTVEC(ill)
/*
* Check if a DTrace hook is registered. The default (data) segment
* cannot be used for this since %ds is not known good until we
* verify that the entry was from kernel mode.
*/
cmpl $0,%ss:dtrace_invop_jump_addr
je norm_ill
/*
* Check if this is a user fault. If so, just handle it as a normal
* trap.
*/
cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */
jne norm_ill
testl $PSL_VM, 8(%esp) /* and vm86 mode. */
jnz norm_ill
cmpl $GSEL_KPL, 4(%esp) /* Check the code segment */
jne norm_ill
/*
* Check if a DTrace hook is registered. The trampoline cannot
* be instrumented.
*/
cmpl $0, dtrace_invop_jump_addr
je norm_ill
/*
* This is a kernel instruction fault that might have been caused
@ -221,47 +282,43 @@ IDTVEC(ill)
* Process the instruction fault in the normal way.
*/
norm_ill:
pushl $0
TRAP(T_PRIVINFLT)
pushl $0
pushl $T_PRIVINFLT
jmp alltraps
#endif
/*
* Call gate entry for syscalls (lcall 7,0).
* This is used by FreeBSD 1.x a.out executables and "old" NetBSD executables.
*
* The intersegment call has been set up to specify one dummy parameter.
* This leaves a place to put eflags so that the call frame can be
* converted to a trap frame. Note that the eflags is (semi-)bogusly
* pushed into (what will be) tf_err and then copied later into the
* final spot. It has to be done this way because esp can't be just
* temporarily altered for the pushfl - an interrupt might come in
* and clobber the saved cs/eip.
*/
SUPERALIGN_TEXT
IDTVEC(lcall_syscall)
pushfl /* save eflags */
popl 8(%esp) /* shuffle into tf_eflags */
pushl $7 /* sizeof "lcall 7,0" */
pushl $0 /* tf_trapno */
pushal
IDTVEC(mchk)
pushl $0
movw %ds,(%esp)
pushl $T_MCHK
jmp nmi_mchk_common
IDTVEC(nmi)
pushl $0
movw %es,(%esp)
pushl $0
movw %fs,(%esp)
pushl $T_NMI
nmi_mchk_common:
PUSH_FRAME2
SET_KERNEL_SREGS
cld
/*
* Save %cr3 into tf_err. There is no good place to put it.
* Always reload %cr3, since we might have interrupted the
* kernel entry or exit.
* Do not switch to the thread kernel stack, otherwise we might
* obliterate the previous context partially copied from the
* trampoline stack.
*/
movl %cr3, %eax
movl %eax, TF_ERR(%esp)
call 1f
1: popl %eax
movl (tramp_idleptd - 1b)(%eax), %eax
movl %eax, %cr3
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call syscall
add $4, %esp
MEXITCOUNT
jmp doreti
jmp calltrap
/*
* Trap gate entry for syscalls (int 0x80).
* This is used by FreeBSD ELF executables, "new" NetBSD executables, and all
* This is used by FreeBSD ELF executables, "new" a.out executables, and all
* Linux executables.
*
* Even though the name says 'int0x80', this is actually a trap gate, not an
@ -272,18 +329,15 @@ IDTVEC(lcall_syscall)
IDTVEC(int0x80_syscall)
pushl $2 /* sizeof "int 0x80" */
pushl $0 /* tf_trapno */
pushal
pushl $0
movw %ds,(%esp)
pushl $0
movw %es,(%esp)
pushl $0
movw %fs,(%esp)
PUSH_FRAME2
SET_KERNEL_SREGS
cld
MOVE_STACKS
sti
FAKE_MCOUNT(TF_EIP(%esp))
pushl %esp
call syscall
movl $syscall, %eax
call *%eax
add $4, %esp
MEXITCOUNT
jmp doreti
@ -292,7 +346,8 @@ ENTRY(fork_trampoline)
pushl %esp /* trapframe pointer */
pushl %ebx /* arg1 */
pushl %esi /* function */
call fork_exit
movl $fork_exit, %eax
call *%eax
addl $12,%esp
/* cut from syscall */
@ -343,6 +398,8 @@ MCOUNT_LABEL(bintr)
.text
MCOUNT_LABEL(eintr)
#include <i386/i386/copyout_fast.s>
/*
* void doreti(struct trapframe)
*
@ -375,7 +432,7 @@ doreti_next:
movl PCPU(CURPCB),%ecx
testl $PCB_VM86CALL,PCB_FLAGS(%ecx)
jz doreti_ast
jmp doreti_exit
jmp doreti_popl_fs
doreti_notvm86:
testb $SEL_RPL_MASK,TF_CS(%esp) /* are we returning to user mode? */
@ -393,7 +450,8 @@ doreti_ast:
je doreti_exit
sti
pushl %esp /* pass a pointer to the trapframe */
call ast
movl $ast, %eax
call *%eax
add $4,%esp
jmp doreti_ast
@ -407,6 +465,23 @@ doreti_ast:
doreti_exit:
MEXITCOUNT
cmpl $T_NMI, TF_TRAPNO(%esp)
je doreti_iret_nmi
cmpl $T_MCHK, TF_TRAPNO(%esp)
je doreti_iret_nmi
testl $SEL_RPL_MASK, TF_CS(%esp)
jz doreti_popl_fs
movl %esp, %esi
movl PCPU(TRAMPSTK), %edx
movl $TF_SZ, %ecx
subl %ecx, %edx
movl %edx, %edi
rep; movsb
movl %edx, %esp
movl PCPU(CURPCB),%eax
movl PCB_CR3(%eax), %eax
movl %eax, %cr3
.globl doreti_popl_fs
doreti_popl_fs:
popl %fs
@ -422,6 +497,11 @@ doreti_popl_ds:
doreti_iret:
iret
doreti_iret_nmi:
movl TF_ERR(%esp), %eax
movl %eax, %cr3
jmp doreti_popl_fs
/*
* doreti_iret_fault and friends. Alternative return code for
* the case where we get a fault in the doreti_exit code
@ -440,7 +520,8 @@ doreti_iret:
ALIGN_TEXT
.globl doreti_iret_fault
doreti_iret_fault:
subl $8,%esp
pushl $0 /* tf_err */
pushl $0 /* tf_trapno XXXKIB: provide more useful value ? */
pushal
pushl $0
movw %ds,(%esp)
@ -460,10 +541,10 @@ doreti_popl_es_fault:
doreti_popl_fs_fault:
testb $SEL_RPL_MASK,TF_CS-TF_FS(%esp)
jz doreti_popl_fs_kfault
sti
movl $0,TF_ERR(%esp) /* XXX should be the error code */
movl $T_PROTFLT,TF_TRAPNO(%esp)
jmp alltraps_with_regs_pushed
SET_KERNEL_SREGS
jmp calltrap
doreti_popl_ds_kfault:
movl $0,(%esp)
@ -474,7 +555,7 @@ doreti_popl_es_kfault:
doreti_popl_fs_kfault:
movl $0,(%esp)
jmp doreti_popl_fs
#ifdef HWPMC_HOOKS
doreti_nmi:
/*
@ -482,6 +563,8 @@ doreti_nmi:
* was from user mode and if so whether the current thread
* needs a user call chain capture.
*/
testl $PSL_VM, TF_EFLAGS(%esp)
jnz doreti_exit
testb $SEL_RPL_MASK,TF_CS(%esp)
jz doreti_exit
movl PCPU(CURTHREAD),%eax /* curthread present? */
@ -489,12 +572,21 @@ doreti_nmi:
jz doreti_exit
testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */
jz doreti_exit
/*
* Switch to thread stack. Reset tf_trapno to not indicate NMI,
* to cause normal userspace exit.
*/
movl $T_RESERVED, TF_TRAPNO(%esp)
NMOVE_STACKS
/*
* Take the processor out of NMI mode by executing a fake "iret".
*/
pushfl
pushl %cs
pushl $outofnmi
call 1f
1: popl %eax
leal (outofnmi-1b)(%eax),%eax
pushl %eax
iret
outofnmi:
/*
@ -511,5 +603,6 @@ outofnmi:
call *%ecx
addl $12,%esp
jmp doreti_ast
ENTRY(end_exceptions)
#endif
ENTRY(end_exceptions)

View File

@ -74,6 +74,7 @@ __FBSDID("$FreeBSD$");
#include <x86/apicreg.h>
#endif
#include <machine/cpu.h>
#include <machine/pcb_ext.h>
#include <machine/pcb.h>
#include <machine/sigframe.h>
#include <machine/vm86.h>
@ -141,6 +142,8 @@ ASSYM(PCB_DR7, offsetof(struct pcb, pcb_dr7));
ASSYM(PCB_DBREGS, PCB_DBREGS);
ASSYM(PCB_EXT, offsetof(struct pcb, pcb_ext));
ASSYM(PCB_EXT_TSS, offsetof(struct pcb_ext, ext_tss));
ASSYM(PCB_FSD, offsetof(struct pcb, pcb_fsd));
ASSYM(PCB_GSD, offsetof(struct pcb, pcb_gsd));
ASSYM(PCB_VM86, offsetof(struct pcb, pcb_vm86));
@ -164,6 +167,7 @@ ASSYM(TF_ERR, offsetof(struct trapframe, tf_err));
ASSYM(TF_EIP, offsetof(struct trapframe, tf_eip));
ASSYM(TF_CS, offsetof(struct trapframe, tf_cs));
ASSYM(TF_EFLAGS, offsetof(struct trapframe, tf_eflags));
ASSYM(TF_SZ, sizeof(struct trapframe));
ASSYM(SIGF_HANDLER, offsetof(struct sigframe, sf_ahu.sf_handler));
#ifdef COMPAT_43
@ -206,7 +210,7 @@ ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
ASSYM(PC_FPCURTHREAD, offsetof(struct pcpu, pc_fpcurthread));
ASSYM(PC_IDLETHREAD, offsetof(struct pcpu, pc_idlethread));
ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
ASSYM(PC_COMMON_TSS, offsetof(struct pcpu, pc_common_tss));
ASSYM(PC_COMMON_TSSP, offsetof(struct pcpu, pc_common_tssp));
ASSYM(PC_COMMON_TSSD, offsetof(struct pcpu, pc_common_tssd));
ASSYM(PC_TSS_GDT, offsetof(struct pcpu, pc_tss_gdt));
ASSYM(PC_FSGS_GDT, offsetof(struct pcpu, pc_fsgs_gdt));
@ -214,6 +218,9 @@ ASSYM(PC_CURRENTLDT, offsetof(struct pcpu, pc_currentldt));
ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
ASSYM(PC_CURPMAP, offsetof(struct pcpu, pc_curpmap));
ASSYM(PC_PRIVATE_TSS, offsetof(struct pcpu, pc_private_tss));
ASSYM(PC_KESP0, offsetof(struct pcpu, pc_kesp0));
ASSYM(PC_TRAMPSTK, offsetof(struct pcpu, pc_trampstk));
ASSYM(PC_COPYOUT_BUF, offsetof(struct pcpu, pc_copyout_buf));
#ifdef DEV_APIC
ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL);
@ -227,6 +234,10 @@ ASSYM(KPSEL, GSEL(GPRIV_SEL, SEL_KPL));
ASSYM(BC32SEL, GSEL(GBIOSCODE32_SEL, SEL_KPL));
ASSYM(GPROC0_SEL, GPROC0_SEL);
ASSYM(VM86_FRAMESIZE, sizeof(struct vm86frame));
ASSYM(VM86_STACK_SPACE, VM86_STACK_SPACE);
ASSYM(PMAP_TRM_MIN_ADDRESS, PMAP_TRM_MIN_ADDRESS);
ASSYM(TRAMP_COPYOUT_SZ, TRAMP_COPYOUT_SZ);
#ifdef HWPMC_HOOKS
ASSYM(PMC_FN_USER_CALLCHAIN, PMC_FN_USER_CALLCHAIN);

View File

@ -53,14 +53,6 @@
#include "assym.inc"
/*
* XXX
*
* Note: This version greatly munged to avoid various assembler errors
* that may be fixed in newer versions of gas. Perhaps newer versions
* will have more pleasant appearance.
*/
/*
* PTmap is recursive pagemap at top of virtual address space.
* Within PTmap, the page directory can be found (third indirection).
@ -71,7 +63,7 @@
.set PTDpde,PTD + (PTDPTDI * PDESIZE)
/*
* Compiled KERNBASE location and the kernel load address
* Compiled KERNBASE location and the kernel load address, now identical.
*/
.globl kernbase
.set kernbase,KERNBASE
@ -90,83 +82,6 @@ tmpstk:
.globl bootinfo
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
.globl KERNend
KERNend: .long 0 /* phys addr end of kernel (just after bss) */
physfree: .long 0 /* phys addr of next free page */
.globl IdlePTD
IdlePTD: .long 0 /* phys addr of kernel PTD */
#if defined(PAE) || defined(PAE_TABLES)
.globl IdlePDPT
IdlePDPT: .long 0 /* phys addr of kernel PDPT */
#endif
.globl KPTmap
KPTmap: .long 0 /* address of kernel page tables */
.globl KPTphys
KPTphys: .long 0 /* phys addr of kernel page tables */
.globl proc0kstack
proc0kstack: .long 0 /* address of proc 0 kstack space */
p0kpa: .long 0 /* phys addr of proc0's STACK */
vm86phystk: .long 0 /* PA of vm86/bios stack */
.globl vm86paddr, vm86pa
vm86paddr: .long 0 /* address of vm86 region */
vm86pa: .long 0 /* phys addr of vm86 region */
/**********************************************************************
*
* Some handy macros
*
*/
#define R(foo) ((foo)-KERNBASE)
#define ALLOCPAGES(foo) \
movl R(physfree), %esi ; \
movl $((foo)*PAGE_SIZE), %eax ; \
addl %esi, %eax ; \
movl %eax, R(physfree) ; \
movl %esi, %edi ; \
movl $((foo)*PAGE_SIZE),%ecx ; \
xorl %eax,%eax ; \
cld ; \
rep ; \
stosb
/*
* fillkpt
* eax = page frame address
* ebx = index into page table
* ecx = how many pages to map
* base = base address of page dir/table
* prot = protection bits
*/
#define fillkpt(base, prot) \
shll $PTESHIFT,%ebx ; \
addl base,%ebx ; \
orl $PG_V,%eax ; \
orl prot,%eax ; \
1: movl %eax,(%ebx) ; \
addl $PAGE_SIZE,%eax ; /* increment physical address */ \
addl $PTESIZE,%ebx ; /* next pte */ \
loop 1b
/*
* fillkptphys(prot)
* eax = physical address
* ecx = how many pages to map
* prot = protection bits
*/
#define fillkptphys(prot) \
movl %eax, %ebx ; \
shrl $PAGE_SHIFT, %ebx ; \
fillkpt(R(KPTphys), prot)
.text
/**********************************************************************
*
@ -179,6 +94,7 @@ NON_GPROF_ENTRY(btext)
movw $0x1234,0x472
/* Set up a real frame in case the double return in newboot is executed. */
xorl %ebp,%ebp
pushl %ebp
movl %esp, %ebp
@ -204,8 +120,8 @@ NON_GPROF_ENTRY(btext)
* inactive from now until we switch to new ones, since we don't load any
* more segment registers or permit interrupts until after the switch.
*/
movl $R(end),%ecx
movl $R(edata),%edi
movl $end,%ecx
movl $edata,%edi
subl %edi,%ecx
xorl %eax,%eax
cld
@ -220,48 +136,10 @@ NON_GPROF_ENTRY(btext)
* the old stack, but it need not be, since recover_bootinfo actually
* returns via the old frame.
*/
movl $R(tmpstk),%esp
movl $tmpstk,%esp
call identify_cpu
call create_pagetables
/*
* If the CPU has support for VME, turn it on.
*/
testl $CPUID_VME, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_VME, %eax
movl %eax, %cr4
1:
/* Now enable paging */
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl %eax, %cr3
movl %cr4, %edx
orl $CR4_PAE, %edx
movl %edx, %cr4
#else
movl R(IdlePTD), %eax
movl %eax,%cr3 /* load ptd addr into mmu */
#endif
movl %cr0,%edx /* get control word */
orl $CR0_PE|CR0_PG,%edx /* enable paging */
movl %edx,%cr0 /* and let's page NOW! */
pushl $begin /* jump to high virtualized address */
ret
begin:
/*
* Now running relocated at KERNBASE where the system is linked to run.
*
* Remove the lowest part of the double mapping of low memory to get
* some null pointer checks.
*/
movl $0,PTD
movl %eax,%cr3 /* invalidate TLB */
call pmap_cold
/* set up bootstrap stack */
movl proc0kstack,%eax /* location of in-kernel stack */
@ -375,7 +253,7 @@ newboot:
cmpl $0,%esi
je 2f /* No kernelname */
movl $MAXPATHLEN,%ecx /* Brute force!!! */
movl $R(kernelname),%edi
movl $kernelname,%edi
cmpb $'/',(%esi) /* Make sure it starts with a slash */
je 1f
movb $'/',(%edi)
@ -403,7 +281,7 @@ got_bi_size:
* Copy the common part of the bootinfo struct
*/
movl %ebx,%esi
movl $R(bootinfo),%edi
movl $bootinfo,%edi
cmpl $BOOTINFO_SIZE,%ecx
jbe got_common_bi_size
movl $BOOTINFO_SIZE,%ecx
@ -420,12 +298,12 @@ got_common_bi_size:
movl BI_NFS_DISKLESS(%ebx),%esi
cmpl $0,%esi
je olddiskboot
movl $R(nfs_diskless),%edi
movl $nfs_diskless,%edi
movl $NFSDISKLESS_SIZE,%ecx
cld
rep
movsb
movl $R(nfs_diskless_valid),%edi
movl $nfs_diskless_valid,%edi
movl $1,(%edi)
#endif
#endif
@ -438,9 +316,9 @@ got_common_bi_size:
*/
olddiskboot:
movl 8(%ebp),%eax
movl %eax,R(boothowto)
movl %eax,boothowto
movl 12(%ebp),%eax
movl %eax,R(bootdev)
movl %eax,bootdev
ret
@ -478,16 +356,16 @@ identify_cpu:
divl %ecx
jz trynexgen
popfl
movl $CPU_386,R(cpu)
movl $CPU_386,cpu
jmp 3f
trynexgen:
popfl
movl $CPU_NX586,R(cpu)
movl $0x4778654e,R(cpu_vendor) # store vendor string
movl $0x72446e65,R(cpu_vendor+4)
movl $0x6e657669,R(cpu_vendor+8)
movl $0,R(cpu_vendor+12)
movl $CPU_NX586,cpu
movl $0x4778654e,cpu_vendor # store vendor string
movl $0x72446e65,cpu_vendor+4
movl $0x6e657669,cpu_vendor+8
movl $0,cpu_vendor+12
jmp 3f
try486: /* Try to toggle identification flag; does not exist on early 486s. */
@ -506,7 +384,7 @@ try486: /* Try to toggle identification flag; does not exist on early 486s. */
testl %eax,%eax
jnz trycpuid
movl $CPU_486,R(cpu)
movl $CPU_486,cpu
/*
* Check Cyrix CPU
@ -533,250 +411,46 @@ trycyrix:
* CPU, we couldn't distinguish it from Cyrix's (including IBM
* brand of Cyrix CPUs).
*/
movl $0x69727943,R(cpu_vendor) # store vendor string
movl $0x736e4978,R(cpu_vendor+4)
movl $0x64616574,R(cpu_vendor+8)
movl $0x69727943,cpu_vendor # store vendor string
movl $0x736e4978,cpu_vendor+4
movl $0x64616574,cpu_vendor+8
jmp 3f
trycpuid: /* Use the `cpuid' instruction. */
xorl %eax,%eax
cpuid # cpuid 0
movl %eax,R(cpu_high) # highest capability
movl %ebx,R(cpu_vendor) # store vendor string
movl %edx,R(cpu_vendor+4)
movl %ecx,R(cpu_vendor+8)
movb $0,R(cpu_vendor+12)
movl %eax,cpu_high # highest capability
movl %ebx,cpu_vendor # store vendor string
movl %edx,cpu_vendor+4
movl %ecx,cpu_vendor+8
movb $0,cpu_vendor+12
movl $1,%eax
cpuid # cpuid 1
movl %eax,R(cpu_id) # store cpu_id
movl %ebx,R(cpu_procinfo) # store cpu_procinfo
movl %edx,R(cpu_feature) # store cpu_feature
movl %ecx,R(cpu_feature2) # store cpu_feature2
movl %eax,cpu_id # store cpu_id
movl %ebx,cpu_procinfo # store cpu_procinfo
movl %edx,cpu_feature # store cpu_feature
movl %ecx,cpu_feature2 # store cpu_feature2
rorl $8,%eax # extract family type
andl $15,%eax
cmpl $5,%eax
jae 1f
/* less than Pentium; must be 486 */
movl $CPU_486,R(cpu)
movl $CPU_486,cpu
jmp 3f
1:
/* a Pentium? */
cmpl $5,%eax
jne 2f
movl $CPU_586,R(cpu)
movl $CPU_586,cpu
jmp 3f
2:
/* Greater than Pentium...call it a Pentium Pro */
movl $CPU_686,R(cpu)
movl $CPU_686,cpu
3:
ret
/**********************************************************************
*
* Create the first page directory and its page tables.
*
*/
create_pagetables:
/* Find end of kernel image (rounded up to a page boundary). */
movl $R(_end),%esi
/* Include symbols, if any. */
movl R(bootinfo+BI_ESYMTAB),%edi
testl %edi,%edi
je over_symalloc
movl %edi,%esi
movl $KERNBASE,%edi
addl %edi,R(bootinfo+BI_SYMTAB)
addl %edi,R(bootinfo+BI_ESYMTAB)
over_symalloc:
/* If we are told where the end of the kernel space is, believe it. */
movl R(bootinfo+BI_KERNEND),%edi
testl %edi,%edi
je no_kernend
movl %edi,%esi
no_kernend:
addl $PDRMASK,%esi /* Play conservative for now, and */
andl $~PDRMASK,%esi /* ... round up to PDR boundary */
movl %esi,R(KERNend) /* save end of kernel */
movl %esi,R(physfree) /* next free page is at end of kernel */
/* Allocate Kernel Page Tables */
ALLOCPAGES(NKPT)
movl %esi,R(KPTphys)
addl $(KERNBASE-(KPTDI<<(PDRSHIFT-PAGE_SHIFT+PTESHIFT))),%esi
movl %esi,R(KPTmap)
/* Allocate Page Table Directory */
#if defined(PAE) || defined(PAE_TABLES)
/* XXX only need 32 bytes (easier for now) */
ALLOCPAGES(1)
movl %esi,R(IdlePDPT)
#endif
ALLOCPAGES(NPGPTD)
movl %esi,R(IdlePTD)
/* Allocate KSTACK */
ALLOCPAGES(TD0_KSTACK_PAGES)
movl %esi,R(p0kpa)
addl $KERNBASE, %esi
movl %esi, R(proc0kstack)
ALLOCPAGES(1) /* vm86/bios stack */
movl %esi,R(vm86phystk)
ALLOCPAGES(3) /* pgtable + ext + IOPAGES */
movl %esi,R(vm86pa)
addl $KERNBASE, %esi
movl %esi, R(vm86paddr)
/*
* Enable PSE and PGE.
*/
#ifndef DISABLE_PSE
testl $CPUID_PSE, R(cpu_feature)
jz 1f
movl $PG_PS, R(pseflag)
movl %cr4, %eax
orl $CR4_PSE, %eax
movl %eax, %cr4
1:
#endif
#ifndef DISABLE_PG_G
testl $CPUID_PGE, R(cpu_feature)
jz 2f
movl $PG_G, R(pgeflag)
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
2:
#endif
/*
* Initialize page table pages mapping physical address zero through the
* (physical) end of the kernel. Many of these pages must be reserved,
* and we reserve them all and map them linearly for convenience. We do
* this even if we've enabled PSE above; we'll just switch the corresponding
* kernel PDEs before we turn on paging.
*
* XXX: We waste some pages here in the PSE case!
*
* This and all other page table entries allow read and write access for
* various reasons. Kernel mappings never have any access restrictions.
*/
xorl %eax, %eax
movl R(KERNend),%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys($PG_RW)
/* Map page table pages. */
movl R(KPTphys),%eax
movl $NKPT,%ecx
fillkptphys($PG_RW)
/* Map page directory. */
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl $1, %ecx
fillkptphys($PG_RW)
#endif
movl R(IdlePTD), %eax
movl $NPGPTD, %ecx
fillkptphys($PG_RW)
/* Map proc0's KSTACK in the physical way ... */
movl R(p0kpa), %eax
movl $(TD0_KSTACK_PAGES), %ecx
fillkptphys($PG_RW)
/* Map ISA hole */
movl $ISA_HOLE_START, %eax
movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
fillkptphys($PG_RW)
/* Map space for the vm86 region */
movl R(vm86phystk), %eax
movl $4, %ecx
fillkptphys($PG_RW)
/* Map page 0 into the vm86 page table */
movl $0, %eax
movl $0, %ebx
movl $1, %ecx
fillkpt(R(vm86pa), $PG_RW|PG_U)
/* ...likewise for the ISA hole */
movl $ISA_HOLE_START, %eax
movl $ISA_HOLE_START>>PAGE_SHIFT, %ebx
movl $ISA_HOLE_LENGTH>>PAGE_SHIFT, %ecx
fillkpt(R(vm86pa), $PG_RW|PG_U)
/*
* Create an identity mapping for low physical memory, including the kernel.
* This is only used to map the 2 instructions for jumping to 'begin' in
* locore (we map everything to avoid having to determine where these
* instructions are). ACPI resume will transiently restore the first PDE in
* this mapping (and depend on this PDE's page table created here not being
* destroyed). See pmap_bootstrap() for more details.
*
* Note: There are errata concerning large pages and physical address zero,
* so a PG_PS mapping should not be used for PDE 0. Our double mapping
* avoids this automatically by not using PG_PS for PDE #KPDI so that PAT
* bits can be set at the page level for i/o pages below 1 MB.
*/
movl R(KPTphys), %eax
xorl %ebx, %ebx
movl $NKPT, %ecx
fillkpt(R(IdlePTD), $PG_RW)
/*
* Install PDEs for PTs covering enough kva to bootstrap. Then for the PSE
* case, replace the PDEs whose coverage is strictly within the kernel
* (between KERNLOAD (rounded up) and KERNend) by large-page PDEs.
*/
movl R(KPTphys), %eax
movl $KPTDI, %ebx
movl $NKPT, %ecx
fillkpt(R(IdlePTD), $PG_RW)
cmpl $0,R(pseflag)
je done_pde
movl R(KERNend), %ecx
movl $(KERNLOAD + PDRMASK) & ~PDRMASK, %eax
subl %eax, %ecx
shrl $PDRSHIFT, %ecx
movl $KPTDI + ((KERNLOAD + PDRMASK) >> PDRSHIFT), %ebx
shll $PDESHIFT, %ebx
addl R(IdlePTD), %ebx
orl $(PG_V|PG_RW|PG_PS), %eax
1: movl %eax, (%ebx)
addl $(1 << PDRSHIFT), %eax
addl $PDESIZE, %ebx
loop 1b
done_pde:
/* install a pde recursively mapping page directory as a page table */
movl R(IdlePTD), %eax
movl $PTDPTDI, %ebx
movl $NPGPTD,%ecx
fillkpt(R(IdlePTD), $PG_RW)
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePTD), %eax
xorl %ebx, %ebx
movl $NPGPTD, %ecx
fillkpt(R(IdlePDPT), $0x0)
#endif
ret
#ifdef XENHVM
/* Xen Hypercall page */
.text

View File

@ -1,6 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-4-Clause
*
* Copyright (c) 2018 The FreeBSD Foundation
* Copyright (c) 1992 Terrence R. Lambert.
* Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
* All rights reserved.
@ -8,6 +9,9 @@
* This code is derived from software contributed to Berkeley by
* William Jolitz.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -81,9 +85,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rwlock.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#ifdef SMP
#include <sys/smp.h>
#endif
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@ -128,6 +130,7 @@ __FBSDID("$FreeBSD$");
#include <machine/reg.h>
#include <machine/sigframe.h>
#include <machine/specialreg.h>
#include <machine/sysarch.h>
#include <machine/trap.h>
#include <machine/vm86.h>
#include <x86/init.h>
@ -152,8 +155,8 @@ __FBSDID("$FreeBSD$");
/* Sanity check for __curthread() */
CTASSERT(offsetof(struct pcpu, pc_curthread) == 0);
extern register_t init386(int first);
extern void dblfault_handler(void);
register_t init386(int first);
void dblfault_handler(void);
static void cpu_startup(void *);
static void fpstate_drop(struct thread *td);
@ -210,14 +213,18 @@ struct mtx icu_lock;
struct mem_range_softc mem_range_softc;
/* Default init_ops implementation. */
struct init_ops init_ops = {
extern char start_exceptions[], end_exceptions[];
extern struct sysentvec elf32_freebsd_sysvec;
/* Default init_ops implementation. */
struct init_ops init_ops = {
.early_clock_source_init = i8254_init,
.early_delay = i8254_delay,
#ifdef DEV_APIC
.msi_init = msi_init,
#endif
};
};
static void
cpu_startup(dummy)
@ -1098,6 +1105,32 @@ sys_sigreturn(td, uap)
return (EJUSTRETURN);
}
#ifdef COMPAT_43
static void
setup_priv_lcall_gate(struct proc *p)
{
struct i386_ldt_args uap;
union descriptor desc;
u_int lcall_addr;
bzero(&uap, sizeof(uap));
uap.start = 0;
uap.num = 1;
lcall_addr = p->p_sysent->sv_psstrings - sz_lcall_tramp;
bzero(&desc, sizeof(desc));
desc.sd.sd_type = SDT_MEMERA;
desc.sd.sd_dpl = SEL_UPL;
desc.sd.sd_p = 1;
desc.sd.sd_def32 = 1;
desc.sd.sd_gran = 1;
desc.sd.sd_lolimit = 0xffff;
desc.sd.sd_hilimit = 0xf;
desc.sd.sd_lobase = lcall_addr;
desc.sd.sd_hibase = lcall_addr >> 24;
i386_set_ldt(curthread, &uap, &desc);
}
#endif
/*
* Reset registers to default values on exec.
*/
@ -1116,10 +1149,16 @@ exec_setregs(struct thread *td, struct image_params *imgp, u_long stack)
load_gs(_udatasel);
mtx_lock_spin(&dt_lock);
if (td->td_proc->p_md.md_ldt)
if (td->td_proc->p_md.md_ldt != NULL)
user_ldt_free(td);
else
mtx_unlock_spin(&dt_lock);
#ifdef COMPAT_43
if (td->td_proc->p_sysent->sv_psstrings !=
elf32_freebsd_sysvec.sv_psstrings)
setup_priv_lcall_gate(td->td_proc);
#endif
/*
* Reset the fs and gs bases. The values from the old address
@ -1222,18 +1261,22 @@ SYSCTL_STRING(_machdep, OID_AUTO, bootmethod, CTLFLAG_RD, bootmethod, 0,
int _default_ldt;
union descriptor gdt[NGDT * MAXCPU]; /* global descriptor table */
union descriptor ldt[NLDT]; /* local descriptor table */
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
struct region_descriptor r_gdt, r_idt; /* table descriptors */
struct mtx dt_lock; /* lock for GDT and LDT */
static struct i386tss dblfault_tss;
static char dblfault_stack[PAGE_SIZE];
union descriptor gdt0[NGDT]; /* initial global descriptor table */
union descriptor *gdt = gdt0; /* global descriptor table */
extern vm_offset_t proc0kstack;
union descriptor *ldt; /* local descriptor table */
static struct gate_descriptor idt0[NIDT];
struct gate_descriptor *idt = &idt0[0]; /* interrupt descriptor table */
static struct i386tss *dblfault_tss;
static char *dblfault_stack;
static struct i386tss common_tss0;
vm_offset_t proc0kstack;
/*
* software prototypes -- in more palatable form.
@ -1334,8 +1377,8 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_def32 = 0,
.ssd_gran = 0 },
/* GLDT_SEL 10 LDT Descriptor */
{ .ssd_base = (int) ldt,
.ssd_limit = sizeof(ldt)-1,
{ .ssd_base = 0,
.ssd_limit = sizeof(union descriptor) * NLDT - 1,
.ssd_type = SDT_SYSLDT,
.ssd_dpl = SEL_UPL,
.ssd_p = 1,
@ -1343,7 +1386,7 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_def32 = 0,
.ssd_gran = 0 },
/* GUSERLDT_SEL 11 User LDT Descriptor per process */
{ .ssd_base = (int) ldt,
{ .ssd_base = 0,
.ssd_limit = (512 * sizeof(union descriptor)-1),
.ssd_type = SDT_SYSLDT,
.ssd_dpl = 0,
@ -1352,7 +1395,7 @@ struct soft_segment_descriptor gdt_segs[] = {
.ssd_def32 = 0,
.ssd_gran = 0 },
/* GPANIC_SEL 12 Panic Tss Descriptor */
{ .ssd_base = (int) &dblfault_tss,
{ .ssd_base = 0,
.ssd_limit = sizeof(struct i386tss)-1,
.ssd_type = SDT_SYS386TSS,
.ssd_dpl = 0,
@ -1473,25 +1516,31 @@ static struct soft_segment_descriptor ldt_segs[] = {
.ssd_gran = 1 },
};
uintptr_t setidt_disp;
void
setidt(idx, func, typ, dpl, selec)
int idx;
inthand_t *func;
int typ;
int dpl;
int selec;
setidt(int idx, inthand_t *func, int typ, int dpl, int selec)
{
uintptr_t off;
off = func != NULL ? (uintptr_t)func + setidt_disp : 0;
setidt_nodisp(idx, off, typ, dpl, selec);
}
void
setidt_nodisp(int idx, uintptr_t off, int typ, int dpl, int selec)
{
struct gate_descriptor *ip;
ip = idt + idx;
ip->gd_looffset = (int)func;
ip->gd_looffset = off;
ip->gd_selector = selec;
ip->gd_stkcpy = 0;
ip->gd_xx = 0;
ip->gd_type = typ;
ip->gd_dpl = dpl;
ip->gd_p = 1;
ip->gd_hioffset = ((int)func)>>16 ;
ip->gd_hioffset = ((u_int)off) >> 16 ;
}
extern inthand_t
@ -1506,7 +1555,7 @@ extern inthand_t
#ifdef XENHVM
IDTVEC(xen_intr_upcall),
#endif
IDTVEC(lcall_syscall), IDTVEC(int0x80_syscall);
IDTVEC(int0x80_syscall);
#ifdef DDB
/*
@ -1517,15 +1566,29 @@ DB_SHOW_COMMAND(idt, db_show_idt)
{
struct gate_descriptor *ip;
int idx;
uintptr_t func;
uintptr_t func, func_trm;
bool trm;
ip = idt;
for (idx = 0; idx < NIDT && !db_pager_quit; idx++) {
func = (ip->gd_hioffset << 16 | ip->gd_looffset);
if (func != (uintptr_t)&IDTVEC(rsvd)) {
db_printf("%3d\t", idx);
db_printsym(func, DB_STGY_PROC);
db_printf("\n");
if (ip->gd_type == SDT_SYSTASKGT) {
db_printf("%3d\t<TASK>\n", idx);
} else {
func = (ip->gd_hioffset << 16 | ip->gd_looffset);
if (func >= PMAP_TRM_MIN_ADDRESS) {
func_trm = func;
func -= setidt_disp;
trm = true;
} else
trm = false;
if (func != (uintptr_t)&IDTVEC(rsvd)) {
db_printf("%3d\t", idx);
db_printsym(func, DB_STGY_PROC);
if (trm)
db_printf(" (trampoline %#x)",
func_trm);
db_printf("\n");
}
}
ip++;
}
@ -1572,6 +1635,24 @@ DB_SHOW_COMMAND(dbregs, db_show_dbregs)
db_printf("dr6\t0x%08x\n", rdr6());
db_printf("dr7\t0x%08x\n", rdr7());
}
DB_SHOW_COMMAND(frame, db_show_frame)
{
struct trapframe *frame;
frame = have_addr ? (struct trapframe *)addr : curthread->td_frame;
printf("ss %#x esp %#x efl %#x cs %#x eip %#x\n",
frame->tf_ss, frame->tf_esp, frame->tf_eflags, frame->tf_cs,
frame->tf_eip);
printf("err %#x trapno %d\n", frame->tf_err, frame->tf_trapno);
printf("ds %#x es %#x fs %#x\n",
frame->tf_ds, frame->tf_es, frame->tf_fs);
printf("eax %#x ecx %#x edx %#x ebx %#x\n",
frame->tf_eax, frame->tf_ecx, frame->tf_edx, frame->tf_ebx);
printf("ebp %#x esi %#x edi %#x\n",
frame->tf_ebp, frame->tf_esi, frame->tf_edi);
}
#endif
void
@ -1698,7 +1779,6 @@ add_smap_entries(struct bios_smap *smapbase, vm_paddr_t *physmap,
static void
basemem_setup(void)
{
vm_paddr_t pa;
pt_entry_t *pte;
int i;
@ -1708,30 +1788,6 @@ basemem_setup(void)
basemem = 640;
}
/*
* XXX if biosbasemem is now < 640, there is a `hole'
* between the end of base memory and the start of
* ISA memory. The hole may be empty or it may
* contain BIOS code or data. Map it read/write so
* that the BIOS can write to it. (Memory from 0 to
* the physical end of the kernel is mapped read-only
* to begin with and then parts of it are remapped.
* The parts that aren't remapped form holes that
* remain read-only and are unused by the kernel.
* The base memory area is below the physical end of
* the kernel and right now forms a read-only hole.
* The part of it from PAGE_SIZE to
* (trunc_page(biosbasemem * 1024) - 1) will be
* remapped and used by the kernel later.)
*
* This code is similar to the code used in
* pmap_mapdev, but since no memory needs to be
* allocated we simply change the mapping.
*/
for (pa = trunc_page(basemem * 1024);
pa < ISA_HOLE_START; pa += PAGE_SIZE)
pmap_kenter(KERNBASE + pa, pa);
/*
* Map pages between basemem and ISA_HOLE_START, if any, r/w into
* the vm86 page table so that vm86 can scribble on them using
@ -1812,9 +1868,8 @@ getmemsize(int first)
* the kernel page table so we can use it as a buffer. The
* kernel will unmap this page later.
*/
pmap_kenter(KERNBASE + (1 << PAGE_SHIFT), 1 << PAGE_SHIFT);
vmc.npages = 0;
smap = (void *)vm86_addpage(&vmc, 1, KERNBASE + (1 << PAGE_SHIFT));
smap = (void *)vm86_addpage(&vmc, 1, PMAP_MAP_LOW + ptoa(1));
res = vm86_getptr(&vmc, (vm_offset_t)smap, &vmf.vmf_es, &vmf.vmf_di);
KASSERT(res != 0, ("vm86_getptr() failed: address not found"));
@ -2135,13 +2190,119 @@ i386_kdb_init(void)
#endif
}
static void
fixup_idt(void)
{
struct gate_descriptor *ip;
uintptr_t off;
int x;
for (x = 0; x < NIDT; x++) {
ip = &idt[x];
if (ip->gd_type != SDT_SYS386IGT &&
ip->gd_type != SDT_SYS386TGT)
continue;
off = ip->gd_looffset + (((u_int)ip->gd_hioffset) << 16);
KASSERT(off >= (uintptr_t)start_exceptions &&
off < (uintptr_t)end_exceptions,
("IDT[%d] type %d off %#x", x, ip->gd_type, off));
off += setidt_disp;
MPASS(off >= PMAP_TRM_MIN_ADDRESS &&
off < PMAP_TRM_MAX_ADDRESS);
ip->gd_looffset = off;
ip->gd_hioffset = off >> 16;
}
}
static void
i386_setidt1(void)
{
int x;
/* exceptions */
for (x = 0; x < NIDT; x++)
setidt(x, &IDTVEC(rsvd), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_DE, &IDTVEC(div), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386IGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL,
SEL_KPL));
setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386IGT,
SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_AC, &IDTVEC(align), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall),
SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
#ifdef KDTRACE_HOOKS
setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret),
SDT_SYS386IGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
#endif
#ifdef XENHVM
setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
#endif
}
static void
i386_setidt2(void)
{
setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
}
#if defined(DEV_ISA) && !defined(DEV_ATPIC)
static void
i386_setidt3(void)
{
setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
}
#endif
register_t
init386(int first)
{
struct gate_descriptor *gdp;
struct region_descriptor r_gdt, r_idt; /* table descriptors */
int gsel_tss, metadata_missing, x, pa;
struct pcpu *pc;
struct xstate_hdr *xhdr;
vm_offset_t addend;
int late_console;
thread0.td_kstack = proc0kstack;
@ -2153,18 +2314,23 @@ init386(int first)
*/
proc_linkup0(&proc0, &thread0);
metadata_missing = 0;
if (bootinfo.bi_modulep) {
preload_metadata = (caddr_t)bootinfo.bi_modulep + KERNBASE;
preload_bootstrap_relocate(KERNBASE);
metadata_missing = 0;
addend = (vm_paddr_t)bootinfo.bi_modulep < KERNBASE ?
PMAP_MAP_LOW : 0;
preload_metadata = (caddr_t)bootinfo.bi_modulep + addend;
preload_bootstrap_relocate(addend);
} else {
metadata_missing = 1;
}
if (bootinfo.bi_envp != 0)
init_static_kenv((char *)bootinfo.bi_envp + KERNBASE, 0);
else
if (bootinfo.bi_envp != 0) {
addend = (vm_paddr_t)bootinfo.bi_envp < KERNBASE ?
PMAP_MAP_LOW : 0;
init_static_kenv((char *)bootinfo.bi_envp + addend, 0);
} else {
init_static_kenv(NULL, 0);
}
identify_hypervisor();
@ -2184,21 +2350,21 @@ init386(int first)
pc = &__pcpu[0];
gdt_segs[GPRIV_SEL].ssd_limit = atop(0 - 1);
gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
gdt_segs[GPROC0_SEL].ssd_base = (int)&common_tss0;
for (x = 0; x < NGDT; x++)
ssdtosd(&gdt_segs[x], &gdt[x].sd);
ssdtosd(&gdt_segs[x], &gdt0[x].sd);
r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
r_gdt.rd_base = (int) gdt;
r_gdt.rd_limit = NGDT * sizeof(gdt0[0]) - 1;
r_gdt.rd_base = (int)gdt0;
mtx_init(&dt_lock, "descriptor tables", NULL, MTX_SPIN);
lgdt(&r_gdt);
pcpu_init(pc, 0, sizeof(struct pcpu));
for (pa = first; pa < first + DPCPU_SIZE; pa += PAGE_SIZE)
pmap_kenter(pa + KERNBASE, pa);
dpcpu_init((void *)(first + KERNBASE), 0);
pmap_kenter(pa, pa);
dpcpu_init((void *)first, 0);
first += DPCPU_SIZE;
PCPU_SET(prvspace, pc);
PCPU_SET(curthread, &thread0);
@ -2215,67 +2381,7 @@ init386(int first)
mutex_init();
mtx_init(&icu_lock, "icu", NULL, MTX_SPIN | MTX_NOWITNESS | MTX_NOPROFILE);
/* make ldt memory segments */
ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
for (x = 0; x < nitems(ldt_segs); x++)
ssdtosd(&ldt_segs[x], &ldt[x].sd);
_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
/* exceptions */
for (x = 0; x < NIDT; x++)
setidt(x, &IDTVEC(rsvd), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_DE, &IDTVEC(div), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_DB, &IDTVEC(dbg), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_NMI, &IDTVEC(nmi), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_BP, &IDTVEC(bpt), SDT_SYS386IGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_OF, &IDTVEC(ofl), SDT_SYS386TGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_BR, &IDTVEC(bnd), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_NM, &IDTVEC(dna), SDT_SYS386TGT, SEL_KPL
, GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_DF, 0, SDT_SYSTASKGT, SEL_KPL, GSEL(GPANIC_SEL, SEL_KPL));
setidt(IDT_FPUGP, &IDTVEC(fpusegm), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_TS, &IDTVEC(tss), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_NP, &IDTVEC(missing), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_SS, &IDTVEC(stk), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_PF, &IDTVEC(page), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_MF, &IDTVEC(fpu), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_AC, &IDTVEC(align), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_MC, &IDTVEC(mchk), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_XF, &IDTVEC(xmm), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_SYSCALL, &IDTVEC(int0x80_syscall), SDT_SYS386TGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
#ifdef KDTRACE_HOOKS
setidt(IDT_DTRACE_RET, &IDTVEC(dtrace_ret), SDT_SYS386TGT, SEL_UPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif
#ifdef XENHVM
setidt(IDT_EVTCHN, &IDTVEC(xen_intr_upcall), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
#endif
i386_setidt1();
r_idt.rd_limit = sizeof(idt0) - 1;
r_idt.rd_base = (int) idt;
@ -2288,41 +2394,21 @@ init386(int first)
clock_init();
finishidentcpu(); /* Final stage of CPU initialization */
setidt(IDT_UD, &IDTVEC(ill), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_GP, &IDTVEC(prot), SDT_SYS386TGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
i386_setidt2();
initializecpu(); /* Initialize CPU registers */
initializecpucache();
/* pointer to selector slot for %fs/%gs */
PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
dblfault_tss.tss_esp = dblfault_tss.tss_esp0 = dblfault_tss.tss_esp1 =
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
#if defined(PAE) || defined(PAE_TABLES)
dblfault_tss.tss_cr3 = (int)IdlePDPT;
#else
dblfault_tss.tss_cr3 = (int)IdlePTD;
#endif
dblfault_tss.tss_eip = (int)dblfault_handler;
dblfault_tss.tss_eflags = PSL_KERNEL;
dblfault_tss.tss_ds = dblfault_tss.tss_es =
dblfault_tss.tss_gs = GSEL(GDATA_SEL, SEL_KPL);
dblfault_tss.tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
dblfault_tss.tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss.tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
/* Initialize the tss (except for the final esp0) early for vm86. */
PCPU_SET(common_tss.tss_esp0, thread0.td_kstack +
thread0.td_kstack_pages * PAGE_SIZE - 16);
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
common_tss0.tss_esp0 = thread0.td_kstack + thread0.td_kstack_pages *
PAGE_SIZE - VM86_STACK_SPACE;
common_tss0.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
common_tss0.tss_ioopt = sizeof(struct i386tss) << 16;
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
ltr(gsel_tss);
/* Initialize the PIC early for vm86 calls. */
@ -2338,10 +2424,7 @@ init386(int first)
* Point the ICU spurious interrupt vectors at the APIC spurious
* interrupt handler.
*/
setidt(IDT_IO_INTS + 7, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
setidt(IDT_IO_INTS + 15, IDTVEC(spuriousint), SDT_SYS386IGT, SEL_KPL,
GSEL(GCODE_SEL, SEL_KPL));
i386_setidt3();
#endif
#endif
@ -2391,22 +2474,11 @@ init386(int first)
PCPU_SET(curpcb, thread0.td_pcb);
/* Move esp0 in the tss to its final place. */
/* Note: -16 is so we can grow the trapframe if we came from vm86 */
PCPU_SET(common_tss.tss_esp0, (vm_offset_t)thread0.td_pcb - 16);
common_tss0.tss_esp0 = (vm_offset_t)thread0.td_pcb - VM86_STACK_SPACE;
PCPU_SET(kesp0, common_tss0.tss_esp0);
gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS; /* clear busy bit */
ltr(gsel_tss);
/* make a call gate to reenter kernel with */
gdp = &ldt[LSYS5CALLS_SEL].gd;
x = (int) &IDTVEC(lcall_syscall);
gdp->gd_looffset = x;
gdp->gd_selector = GSEL(GCODE_SEL,SEL_KPL);
gdp->gd_stkcpy = 1;
gdp->gd_type = SDT_SYS386CGT;
gdp->gd_dpl = SEL_UPL;
gdp->gd_p = 1;
gdp->gd_hioffset = x >> 16;
/* transfer to user mode */
_ucodesel = GSEL(GUCODE_SEL, SEL_UPL);
@ -2432,6 +2504,133 @@ init386(int first)
return ((register_t)thread0.td_pcb);
}
extern u_int tramp_idleptd;
static void
machdep_init_trampoline(void)
{
struct region_descriptor r_gdt, r_idt;
struct i386tss *tss;
char *copyout_buf, *trampoline, *tramp_stack_base;
u_int *tramp_idleptd_reloced;
int x;
gdt = pmap_trm_alloc(sizeof(union descriptor) * NGDT * mp_ncpus,
M_NOWAIT | M_ZERO);
bcopy(gdt0, gdt, sizeof(union descriptor) * NGDT);
r_gdt.rd_limit = NGDT * sizeof(gdt[0]) - 1;
r_gdt.rd_base = (int)gdt;
lgdt(&r_gdt);
tss = pmap_trm_alloc(sizeof(struct i386tss) * mp_ncpus,
M_NOWAIT | M_ZERO);
bcopy(&common_tss0, tss, sizeof(struct i386tss));
gdt[GPROC0_SEL].sd.sd_lobase = (int)tss;
gdt[GPROC0_SEL].sd.sd_hibase = (u_int)tss >> 24;
gdt[GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
ltr(GSEL(GPROC0_SEL, SEL_KPL));
PCPU_SET(fsgs_gdt, &gdt[GUFS_SEL].sd);
PCPU_SET(tss_gdt, &gdt[GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
PCPU_SET(common_tssp, tss);
trampoline = pmap_trm_alloc(end_exceptions - start_exceptions,
M_NOWAIT);
bcopy(start_exceptions, trampoline, end_exceptions - start_exceptions);
tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
PCPU_SET(trampstk, (uintptr_t)tramp_stack_base + TRAMP_STACK_SZ -
VM86_STACK_SPACE);
tss[0].tss_esp0 = PCPU_GET(trampstk);
idt = pmap_trm_alloc(sizeof(idt0), M_NOWAIT | M_ZERO);
bcopy(idt0, idt, sizeof(idt0));
/* Re-initialize new IDT since the handlers were relocated */
setidt_disp = trampoline - start_exceptions;
fixup_idt();
tramp_idleptd_reloced = (u_int *)((uintptr_t)&tramp_idleptd +
setidt_disp);
#if defined(PAE) || defined(PAE_TABLES)
*tramp_idleptd_reloced = (u_int)IdlePDPT;
#else
*tramp_idleptd_reloced = (u_int)IdlePTD;
#endif
r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
r_idt.rd_base = (int)idt;
lidt(&r_idt);
/* dblfault TSS */
dblfault_tss = pmap_trm_alloc(sizeof(struct i386tss), M_NOWAIT | M_ZERO);
dblfault_stack = pmap_trm_alloc(PAGE_SIZE, M_NOWAIT);
dblfault_tss->tss_esp = dblfault_tss->tss_esp0 =
dblfault_tss->tss_esp1 = dblfault_tss->tss_esp2 =
(int)dblfault_stack + PAGE_SIZE;
dblfault_tss->tss_ss = dblfault_tss->tss_ss0 = dblfault_tss->tss_ss1 =
dblfault_tss->tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
#if defined(PAE) || defined(PAE_TABLES)
dblfault_tss->tss_cr3 = (int)IdlePDPT;
#else
dblfault_tss->tss_cr3 = (int)IdlePTD;
#endif
dblfault_tss->tss_eip = (int)dblfault_handler;
dblfault_tss->tss_eflags = PSL_KERNEL;
dblfault_tss->tss_ds = dblfault_tss->tss_es =
dblfault_tss->tss_gs = GSEL(GDATA_SEL, SEL_KPL);
dblfault_tss->tss_fs = GSEL(GPRIV_SEL, SEL_KPL);
dblfault_tss->tss_cs = GSEL(GCODE_SEL, SEL_KPL);
dblfault_tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
gdt[GPANIC_SEL].sd.sd_lobase = (int)dblfault_tss;
gdt[GPANIC_SEL].sd.sd_hibase = (u_int)dblfault_tss >> 24;
/* make ldt memory segments */
ldt = pmap_trm_alloc(sizeof(union descriptor) * NLDT,
M_NOWAIT | M_ZERO);
gdt[GLDT_SEL].sd.sd_lobase = (int)ldt;
gdt[GLDT_SEL].sd.sd_hibase = (u_int)ldt >> 24;
ldt_segs[LUCODE_SEL].ssd_limit = atop(0 - 1);
ldt_segs[LUDATA_SEL].ssd_limit = atop(0 - 1);
for (x = 0; x < nitems(ldt_segs); x++)
ssdtosd(&ldt_segs[x], &ldt[x].sd);
_default_ldt = GSEL(GLDT_SEL, SEL_KPL);
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
PCPU_SET(copyout_buf, copyout_buf);
copyout_init_tramp();
}
SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_SECOND, machdep_init_trampoline, NULL);
#ifdef COMPAT_43
static void
i386_setup_lcall_gate(void)
{
struct sysentvec *sv;
struct user_segment_descriptor desc;
u_int lcall_addr;
sv = &elf32_freebsd_sysvec;
lcall_addr = (uintptr_t)sv->sv_psstrings - sz_lcall_tramp;
bzero(&desc, sizeof(desc));
desc.sd_type = SDT_MEMERA;
desc.sd_dpl = SEL_UPL;
desc.sd_p = 1;
desc.sd_def32 = 1;
desc.sd_gran = 1;
desc.sd_lolimit = 0xffff;
desc.sd_hilimit = 0xf;
desc.sd_lobase = lcall_addr;
desc.sd_hibase = lcall_addr >> 24;
bcopy(&desc, &ldt[LSYS5CALLS_SEL], sizeof(desc));
}
SYSINIT(elf32, SI_SUB_EXEC, SI_ORDER_ANY, i386_setup_lcall_gate, NULL);
#endif
void
cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
{
@ -2512,6 +2711,7 @@ SYSINIT(f00f_hack, SI_SUB_INTRINSIC, SI_ORDER_FIRST, f00f_hack, NULL);
static void
f00f_hack(void *unused)
{
struct region_descriptor r_idt;
struct gate_descriptor *new_idt;
vm_offset_t tmp;
@ -2522,16 +2722,19 @@ f00f_hack(void *unused)
printf("Intel Pentium detected, installing workaround for F00F bug\n");
tmp = kmem_malloc(kernel_arena, PAGE_SIZE * 2, M_WAITOK | M_ZERO);
tmp = (vm_offset_t)pmap_trm_alloc(PAGE_SIZE * 3, M_NOWAIT | M_ZERO);
if (tmp == 0)
panic("kmem_malloc returned 0");
tmp = round_page(tmp);
/* Put the problematic entry (#6) at the end of the lower page. */
new_idt = (struct gate_descriptor*)
new_idt = (struct gate_descriptor *)
(tmp + PAGE_SIZE - 7 * sizeof(struct gate_descriptor));
bcopy(idt, new_idt, sizeof(idt0));
r_idt.rd_base = (u_int)new_idt;
r_idt.rd_limit = sizeof(idt0) - 1;
lidt(&r_idt);
/* SMP machines do not need the F00F hack. */
idt = new_idt;
pmap_protect(kernel_pmap, tmp, tmp + PAGE_SIZE, VM_PROT_READ);
}

View File

@ -92,9 +92,6 @@ memrw(struct cdev *dev, struct uio *uio, int flags)
return EIO;
if (dev2unit(dev) == CDEV_MINOR_KMEM && uio->uio_resid > 0) {
if (uio->uio_offset < (vm_offset_t)VADDR(PTDPTDI, 0))
return (EFAULT);
if (!kernacc((caddr_t)(int)uio->uio_offset, uio->uio_resid,
uio->uio_rw == UIO_READ ? VM_PROT_READ : VM_PROT_WRITE))
return (EFAULT);

View File

@ -190,7 +190,7 @@ minidumpsys(struct dumperinfo *di)
* page written corresponds to 2MB of space
*/
ptesize += PAGE_SIZE;
pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */
pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is an entire 2M page. */
@ -281,7 +281,7 @@ minidumpsys(struct dumperinfo *di)
/* Dump kernel page table pages */
for (va = KERNBASE; va < kernel_vm_end; va += NBPDR) {
/* We always write a page, even if it is zero */
pd = (pd_entry_t *)((uintptr_t)IdlePTD + KERNBASE); /* always mapped! */
pd = IdlePTD; /* always mapped! */
j = va >> PDRSHIFT;
if ((pd[j] & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
/* This is a single 2M block. Generate a fake PTP */

View File

@ -83,8 +83,8 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#define WARMBOOT_TARGET 0
#define WARMBOOT_OFF (KERNBASE + 0x0467)
#define WARMBOOT_SEG (KERNBASE + 0x0469)
#define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467)
#define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469)
#define CMOS_REG (0x70)
#define CMOS_DATA (0x71)
@ -139,6 +139,8 @@ static void install_ap_tramp(void);
static int start_all_aps(void);
static int start_ap(int apic_id);
static char *ap_copyout_buf;
static char *ap_tramp_stack_base;
/*
* Initialize the IPI handlers and start up the AP's.
*/
@ -207,10 +209,10 @@ void
init_secondary(void)
{
struct pcpu *pc;
vm_offset_t addr;
int gsel_tss;
int x, myid;
u_int cr0;
struct i386tss *common_tssp;
struct region_descriptor r_gdt, r_idt;
int gsel_tss, myid, x;
u_int cr0;
/* bootAP is set in start_ap() to our ID. */
myid = bootAP;
@ -224,11 +226,13 @@ init_secondary(void)
pc->pc_apic_id = cpu_apic_ids[myid];
pc->pc_prvspace = pc;
pc->pc_curthread = 0;
pc->pc_common_tssp = common_tssp = &(__pcpu[0].pc_common_tssp)[myid];
fix_cpuid();
gdt_segs[GPRIV_SEL].ssd_base = (int) pc;
gdt_segs[GPROC0_SEL].ssd_base = (int) &pc->pc_common_tss;
gdt_segs[GPRIV_SEL].ssd_base = (int)pc;
gdt_segs[GPROC0_SEL].ssd_base = (int)common_tssp;
gdt_segs[GLDT_SEL].ssd_base = (int)ldt;
for (x = 0; x < NGDT; x++) {
ssdtosd(&gdt_segs[x], &gdt[myid * NGDT + x].sd);
@ -238,21 +242,27 @@ init_secondary(void)
r_gdt.rd_base = (int) &gdt[myid * NGDT];
lgdt(&r_gdt); /* does magic intra-segment return */
r_idt.rd_limit = sizeof(struct gate_descriptor) * NIDT - 1;
r_idt.rd_base = (int)idt;
lidt(&r_idt);
lldt(_default_ldt);
PCPU_SET(currentldt, _default_ldt);
PCPU_SET(trampstk, (uintptr_t)ap_tramp_stack_base + TRAMP_STACK_SZ -
VM86_STACK_SPACE);
gsel_tss = GSEL(GPROC0_SEL, SEL_KPL);
gdt[myid * NGDT + GPROC0_SEL].sd.sd_type = SDT_SYS386TSS;
PCPU_SET(common_tss.tss_esp0, 0); /* not used until after switch */
PCPU_SET(common_tss.tss_ss0, GSEL(GDATA_SEL, SEL_KPL));
PCPU_SET(common_tss.tss_ioopt, (sizeof (struct i386tss)) << 16);
common_tssp->tss_esp0 = PCPU_GET(trampstk);
common_tssp->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
common_tssp->tss_ioopt = sizeof(struct i386tss) << 16;
PCPU_SET(tss_gdt, &gdt[myid * NGDT + GPROC0_SEL].sd);
PCPU_SET(common_tssd, *PCPU_GET(tss_gdt));
ltr(gsel_tss);
PCPU_SET(fsgs_gdt, &gdt[myid * NGDT + GUFS_SEL].sd);
PCPU_SET(copyout_buf, ap_copyout_buf);
/*
* Set to a known state:
@ -274,8 +284,6 @@ init_secondary(void)
/* BSP may have changed PTD while we were waiting */
invltlb();
for (addr = 0; addr < NKPT * NBPDR - 1; addr += PAGE_SIZE)
invlpg(addr);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -287,17 +295,20 @@ init_secondary(void)
/*
* start each AP in our list
*/
/* Lowest 1MB is already mapped: don't touch*/
#define TMPMAP_START 1
static int
start_all_aps(void)
{
u_char mpbiosreason;
u_int32_t mpbioswarmvec;
int apic_id, cpu, i;
int apic_id, cpu;
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
/* Remap lowest 1MB */
IdlePTD[0] = IdlePTD[1];
load_cr3(rcr3()); /* invalidate TLB */
/* install the AP 1st level boot code */
install_ap_tramp();
@ -306,11 +317,7 @@ start_all_aps(void)
outb(CMOS_REG, BIOS_RESET);
mpbiosreason = inb(CMOS_DATA);
/* set up temporary P==V mapping for AP boot */
/* XXX this is a hack, we should boot the AP on its own stack/PTD */
for (i = TMPMAP_START; i < NKPT; i++)
PTD[i] = PTD[KPTDI + i];
invltlb();
/* take advantage of the P==V mapping for PTD[0] for AP boot */
/* start each AP */
for (cpu = 1; cpu < mp_ncpus; cpu++) {
@ -332,6 +339,9 @@ start_all_aps(void)
PAGE_SIZE - 4;
bootAP = cpu;
ap_tramp_stack_base = pmap_trm_alloc(TRAMP_STACK_SZ, M_NOWAIT);
ap_copyout_buf = pmap_trm_alloc(TRAMP_COPYOUT_SZ, M_NOWAIT);
/* attempt to start the Application Processor */
CHECK_INIT(99); /* setup checkpoints */
if (!start_ap(apic_id)) {
@ -347,17 +357,16 @@ start_all_aps(void)
CPU_SET(cpu, &all_cpus); /* record AP in CPU map */
}
/* Unmap lowest 1MB again */
IdlePTD[0] = 0;
load_cr3(rcr3());
/* restore the warmstart vector */
*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, mpbiosreason);
/* Undo V==P hack from above */
for (i = TMPMAP_START; i < NKPT; i++)
PTD[i] = 0;
pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
/* number of APs actually started */
return mp_naps;
}
@ -379,7 +388,7 @@ install_ap_tramp(void)
{
int x;
int size = *(int *) ((u_long) & bootMP_size);
vm_offset_t va = boot_address + KERNBASE;
vm_offset_t va = boot_address;
u_char *src = (u_char *) ((u_long) bootMP);
u_char *dst = (u_char *) va;
u_int boot_base = (u_int) bootMP;
@ -409,7 +418,7 @@ install_ap_tramp(void)
/* modify the ljmp target for MPentry() */
dst32 = (u_int32_t *) (dst + ((u_int) bigJump - boot_base) + 1);
*dst32 = ((u_int) MPentry - KERNBASE);
*dst32 = (u_int)MPentry;
/* modify the target for boot code segment */
dst16 = (u_int16_t *) (dst + ((u_int) bootCodeSeg - boot_base));

View File

@ -37,8 +37,6 @@
#include "assym.inc"
#define R(x) ((x)-KERNBASE)
/*
* this code MUST be enabled here and in mp_machdep.c
* it follows the very early stages of AP boot by placing values in CMOS ram.
@ -80,18 +78,14 @@ NON_GPROF_ENTRY(MPentry)
movl $1,%eax
cpuid /* Retrieve features */
movl %cr4,%eax
#ifndef DISABLE_PSE
testl $CPUID_PSE,%edx
jz 1f
orl $CR4_PSE,%eax /* Enable PSE */
1:
#endif
#ifndef DISABLE_PG_G
testl $CPUID_PGE,%edx
jz 1f
orl $CR4_PGE,%eax /* Enable PGE */
1:
#endif
testl $CPUID_VME,%edx
jz 1f
orl $CR4_VME,%eax /* Enable VME */
@ -100,13 +94,13 @@ NON_GPROF_ENTRY(MPentry)
/* Now enable paging mode */
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl IdlePDPT, %eax
movl %eax, %cr3
movl %cr4, %eax
orl $CR4_PAE, %eax
movl %eax, %cr4
#else
movl R(IdlePTD), %eax
movl IdlePTD, %eax
movl %eax,%cr3
#endif
movl %cr0,%eax

File diff suppressed because it is too large Load Diff

View File

@ -95,6 +95,25 @@ osigcode:
pushl %eax /* junk to fake return addr. */
int $0x80 /* enter kernel with args */
0: jmp 0b
/*
* Our lcall $7,$0 handler remains in user mode (ring 3), since lcalls
* don't change the interrupt mask, so if this one went directly to the
* kernel then there would be a window with interrupts enabled in kernel
* mode, and all interrupt handlers would have to be almost as complicated
* as the NMI handler to support this.
*
* Instead, convert the lcall to an int0x80 call. The kernel does most
* of the conversion by popping the lcall return values off the user
* stack and returning to them instead of to here, except when the
* conversion itself fails. Adjusting the stack here is impossible for
* vfork() and harder for other syscalls.
*/
ALIGN_TEXT
lcall_tramp:
int $0x80
1: jmp 1b
#endif /* COMPAT_43 */
ALIGN_TEXT
@ -113,4 +132,7 @@ szfreebsd4_sigcode:
.globl szosigcode
szosigcode:
.long esigcode-osigcode
.globl sz_lcall_tramp
sz_lcall_tramp:
.long esigcode-lcall_tramp
#endif

View File

@ -251,196 +251,6 @@ ENTRY(memcpy)
ret
END(memcpy)
/*****************************************************************************/
/* copyout and fubyte family */
/*****************************************************************************/
/*
* Access user memory from inside the kernel. These routines and possibly
* the math- and DOS emulators should be the only places that do this.
*
* We have to access the memory with user's permissions, so use a segment
* selector with RPL 3. For writes to user space we have to additionally
* check the PTE for write permission, because the 386 does not check
* write permissions when we are executing with EPL 0. The 486 does check
* this if the WP bit is set in CR0, so we can use a simpler version here.
*
* These routines set curpcb->pcb_onfault for the time they execute. When a
* protection violation occurs inside the functions, the trap handler
* returns to *curpcb->pcb_onfault instead of the function.
*/
/*
* copyout(from_kernel, to_user, len) - MP SAFE
*/
ENTRY(copyout)
movl PCPU(CURPCB),%eax
movl $copyout_fault,PCB_ONFAULT(%eax)
pushl %esi
pushl %edi
pushl %ebx
movl 16(%esp),%esi
movl 20(%esp),%edi
movl 24(%esp),%ebx
testl %ebx,%ebx /* anything to do? */
jz done_copyout
/*
* Check explicitly for non-user addresses. This check is essential
* because it prevents usermode from writing into the kernel. We do
* not verify anywhere else that the user did not specify a rogue
* address.
*/
/*
* First, prevent address wrapping.
*/
movl %edi,%eax
addl %ebx,%eax
jc copyout_fault
/*
* XXX STOP USING VM_MAXUSER_ADDRESS.
* It is an end address, not a max, so every time it is used correctly it
* looks like there is an off by one error, and of course it caused an off
* by one error in several places.
*/
cmpl $VM_MAXUSER_ADDRESS,%eax
ja copyout_fault
/* bcopy(%esi, %edi, %ebx) */
movl %ebx,%ecx
shrl $2,%ecx
rep
movsl
movb %bl,%cl
andb $3,%cl
rep
movsb
done_copyout:
popl %ebx
popl %edi
popl %esi
xorl %eax,%eax
movl PCPU(CURPCB),%edx
movl %eax,PCB_ONFAULT(%edx)
ret
END(copyout)
ALIGN_TEXT
copyout_fault:
popl %ebx
popl %edi
popl %esi
movl PCPU(CURPCB),%edx
movl $0,PCB_ONFAULT(%edx)
movl $EFAULT,%eax
ret
/*
* copyin(from_user, to_kernel, len) - MP SAFE
*/
ENTRY(copyin)
movl PCPU(CURPCB),%eax
movl $copyin_fault,PCB_ONFAULT(%eax)
pushl %esi
pushl %edi
movl 12(%esp),%esi /* caddr_t from */
movl 16(%esp),%edi /* caddr_t to */
movl 20(%esp),%ecx /* size_t len */
/*
* make sure address is valid
*/
movl %esi,%edx
addl %ecx,%edx
jc copyin_fault
cmpl $VM_MAXUSER_ADDRESS,%edx
ja copyin_fault
movb %cl,%al
shrl $2,%ecx /* copy longword-wise */
rep
movsl
movb %al,%cl
andb $3,%cl /* copy remaining bytes */
rep
movsb
popl %edi
popl %esi
xorl %eax,%eax
movl PCPU(CURPCB),%edx
movl %eax,PCB_ONFAULT(%edx)
ret
END(copyin)
ALIGN_TEXT
copyin_fault:
popl %edi
popl %esi
movl PCPU(CURPCB),%edx
movl $0,PCB_ONFAULT(%edx)
movl $EFAULT,%eax
ret
/*
* casueword. Compare and set user word. Returns -1 on fault,
* 0 on non-faulting access. The current value is in *oldp.
*/
ALTENTRY(casueword32)
ENTRY(casueword)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx /* dst */
movl 8(%esp),%eax /* old */
movl 16(%esp),%ecx /* new */
cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
ja fusufault
#ifdef SMP
lock
#endif
cmpxchgl %ecx,(%edx) /* Compare and set. */
/*
* The old value is in %eax. If the store succeeded it will be the
* value we expected (old) from before the store, otherwise it will
* be the current value.
*/
movl PCPU(CURPCB),%ecx
movl $0,PCB_ONFAULT(%ecx)
movl 12(%esp),%edx /* oldp */
movl %eax,(%edx)
xorl %eax,%eax
ret
END(casueword32)
END(casueword)
/*
* Fetch (load) a 32-bit word, a 16-bit word, or an 8-bit byte from user
* memory.
*/
ALTENTRY(fueword32)
ENTRY(fueword)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx /* from */
cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address is valid */
ja fusufault
movl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
movl 8(%esp),%edx
movl %eax,(%edx)
xorl %eax,%eax
ret
END(fueword32)
END(fueword)
/*
* fuswintr() and suswintr() are specialized variants of fuword16() and
* suword16(), respectively. They are called from the profiling code,
@ -455,167 +265,6 @@ ENTRY(fuswintr)
END(suswintr)
END(fuswintr)
ENTRY(fuword16)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx
cmpl $VM_MAXUSER_ADDRESS-2,%edx
ja fusufault
movzwl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
ret
END(fuword16)
ENTRY(fubyte)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx
cmpl $VM_MAXUSER_ADDRESS-1,%edx
ja fusufault
movzbl (%edx),%eax
movl $0,PCB_ONFAULT(%ecx)
ret
END(fubyte)
ALIGN_TEXT
fusufault:
movl PCPU(CURPCB),%ecx
xorl %eax,%eax
movl %eax,PCB_ONFAULT(%ecx)
decl %eax
ret
/*
* Store a 32-bit word, a 16-bit word, or an 8-bit byte to user memory.
* All these functions are MPSAFE.
*/
ALTENTRY(suword32)
ENTRY(suword)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx
cmpl $VM_MAXUSER_ADDRESS-4,%edx /* verify address validity */
ja fusufault
movl 8(%esp),%eax
movl %eax,(%edx)
xorl %eax,%eax
movl PCPU(CURPCB),%ecx
movl %eax,PCB_ONFAULT(%ecx)
ret
END(suword32)
END(suword)
ENTRY(suword16)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx
cmpl $VM_MAXUSER_ADDRESS-2,%edx /* verify address validity */
ja fusufault
movw 8(%esp),%ax
movw %ax,(%edx)
xorl %eax,%eax
movl PCPU(CURPCB),%ecx /* restore trashed register */
movl %eax,PCB_ONFAULT(%ecx)
ret
END(suword16)
ENTRY(subyte)
movl PCPU(CURPCB),%ecx
movl $fusufault,PCB_ONFAULT(%ecx)
movl 4(%esp),%edx
cmpl $VM_MAXUSER_ADDRESS-1,%edx /* verify address validity */
ja fusufault
movb 8(%esp),%al
movb %al,(%edx)
xorl %eax,%eax
movl PCPU(CURPCB),%ecx /* restore trashed register */
movl %eax,PCB_ONFAULT(%ecx)
ret
END(subyte)
/*
* copyinstr(from, to, maxlen, int *lencopied) - MP SAFE
*
* copy a string from 'from' to 'to', stop when a 0 character is reached.
* return ENAMETOOLONG if string is longer than maxlen, and
* EFAULT on protection violations. If lencopied is non-zero,
* return the actual length in *lencopied.
*/
ENTRY(copyinstr)
pushl %esi
pushl %edi
movl PCPU(CURPCB),%ecx
movl $cpystrflt,PCB_ONFAULT(%ecx)
movl 12(%esp),%esi /* %esi = from */
movl 16(%esp),%edi /* %edi = to */
movl 20(%esp),%edx /* %edx = maxlen */
movl $VM_MAXUSER_ADDRESS,%eax
/* make sure 'from' is within bounds */
subl %esi,%eax
jbe cpystrflt
/* restrict maxlen to <= VM_MAXUSER_ADDRESS-from */
cmpl %edx,%eax
jae 1f
movl %eax,%edx
movl %eax,20(%esp)
1:
incl %edx
2:
decl %edx
jz 3f
lodsb
stosb
orb %al,%al
jnz 2b
/* Success -- 0 byte reached */
decl %edx
xorl %eax,%eax
jmp cpystrflt_x
3:
/* edx is zero - return ENAMETOOLONG or EFAULT */
cmpl $VM_MAXUSER_ADDRESS,%esi
jae cpystrflt
4:
movl $ENAMETOOLONG,%eax
jmp cpystrflt_x
cpystrflt:
movl $EFAULT,%eax
cpystrflt_x:
/* set *lencopied and return %eax */
movl PCPU(CURPCB),%ecx
movl $0,PCB_ONFAULT(%ecx)
movl 20(%esp),%ecx
subl %edx,%ecx
movl 24(%esp),%edx
testl %edx,%edx
jz 1f
movl %ecx,(%edx)
1:
popl %edi
popl %esi
ret
END(copyinstr)
/*
* copystr(from, to, maxlen, int *lencopied) - MP SAFE
*/

View File

@ -86,8 +86,6 @@ ENTRY(cpu_throw)
1:
movl 8(%esp),%ecx /* New thread */
movl TD_PCB(%ecx),%edx
movl PCB_CR3(%edx),%eax
movl %eax,%cr3
/* set bit in new pm_active */
movl TD_PROC(%ecx),%eax
movl P_VMSPACE(%eax), %ebx
@ -157,7 +155,7 @@ ENTRY(cpu_switch)
popl %eax
1:
/* Save is done. Now fire up new thread. Leave old vmspace. */
/* Save is done. Now fire up new thread. */
movl 4(%esp),%edi
movl 8(%esp),%ecx /* New thread */
movl 12(%esp),%esi /* New lock */
@ -167,15 +165,10 @@ ENTRY(cpu_switch)
#endif
movl TD_PCB(%ecx),%edx
/* switch address space */
movl PCB_CR3(%edx),%eax
movl %cr3,%ebx /* The same address space? */
cmpl %ebx,%eax
je sw0
movl %eax,%cr3 /* new address space */
/* Switchout td_lock */
movl %esi,%eax
movl PCPU(CPUID),%esi
SETOP %eax,TD_LOCK(%edi) /* Switchout td_lock */
SETOP %eax,TD_LOCK(%edi)
/* Release bit from old pmap->pm_active */
movl PCPU(CURPMAP), %ebx
@ -200,26 +193,28 @@ sw0:
sw1:
BLOCK_SPIN(%ecx)
/*
* At this point, we've switched address spaces and are ready
* At this point, we have managed thread locks and are ready
* to load up the rest of the next context.
*/
/* Load a pointer to the thread kernel stack into PCPU. */
leal -VM86_STACK_SPACE(%edx), %eax /* leave space for vm86 */
movl %eax, PCPU(KESP0)
cmpl $0, PCB_EXT(%edx) /* has pcb extension? */
je 1f /* If not, use the default */
movl $1, PCPU(PRIVATE_TSS) /* mark use of private tss */
movl PCB_EXT(%edx), %edi /* new tss descriptor */
movl PCPU(TRAMPSTK), %ebx
movl %ebx, PCB_EXT_TSS+TSS_ESP0(%edi)
jmp 2f /* Load it up */
1: /*
* Use the common default TSS instead of our own.
* Set our stack pointer into the TSS, it's set to just
* below the PCB. In C, common_tss.tss_esp0 = &pcb - 16;
*/
leal -16(%edx), %ebx /* leave space for vm86 */
movl %ebx, PCPU(COMMON_TSS) + TSS_ESP0
/*
* Test this CPU's bit in the bitmap to see if this
* CPU was using a private TSS.
* Stack pointer in the common TSS points to the trampoline stack
* already and should be not changed.
*
* Test this CPU's flag to see if this CPU was using a private TSS.
*/
cmpl $0, PCPU(PRIVATE_TSS) /* Already using the common? */
je 3f /* if so, skip reloading */

View File

@ -294,10 +294,8 @@ i386_extend_pcb(struct thread *td)
0 /* granularity */
};
ext = (struct pcb_ext *)kmem_malloc(kernel_arena, ctob(IOPAGES+1),
M_WAITOK | M_ZERO);
ext = pmap_trm_alloc(ctob(IOPAGES + 1), M_WAITOK | M_ZERO);
/* -16 is so we can convert a trapframe into vm86trapframe inplace */
ext->ext_tss.tss_esp0 = (vm_offset_t)td->td_pcb - 16;
ext->ext_tss.tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
/*
* The last byte of the i/o map must be followed by an 0xff byte.
@ -323,6 +321,7 @@ i386_extend_pcb(struct thread *td)
/* Switch to the new TSS. */
critical_enter();
ext->ext_tss.tss_esp0 = PCPU_GET(trampstk);
td->td_pcb->pcb_ext = ext;
PCPU_SET(private_tss, 1);
*PCPU_GET(tss_gdt) = ext->ext_tssd;
@ -457,8 +456,8 @@ user_ldt_alloc(struct mdproc *mdp, int len)
new_ldt = malloc(sizeof(struct proc_ldt), M_SUBPROC, M_WAITOK);
new_ldt->ldt_len = len = NEW_MAX_LD(len);
new_ldt->ldt_base = (caddr_t)kmem_malloc(kernel_arena,
len * sizeof(union descriptor), M_WAITOK | M_ZERO);
new_ldt->ldt_base = pmap_trm_alloc(len * sizeof(union descriptor),
M_WAITOK | M_ZERO);
new_ldt->ldt_refcnt = 1;
new_ldt->ldt_active = 0;
@ -473,7 +472,7 @@ user_ldt_alloc(struct mdproc *mdp, int len)
bcopy(pldt->ldt_base, new_ldt->ldt_base,
len * sizeof(union descriptor));
} else
bcopy(ldt, new_ldt->ldt_base, sizeof(ldt));
bcopy(ldt, new_ldt->ldt_base, sizeof(union descriptor) * NLDT);
return (new_ldt);
}
@ -510,8 +509,8 @@ user_ldt_deref(struct proc_ldt *pldt)
mtx_assert(&dt_lock, MA_OWNED);
if (--pldt->ldt_refcnt == 0) {
mtx_unlock_spin(&dt_lock);
kmem_free(kernel_arena, (vm_offset_t)pldt->ldt_base,
pldt->ldt_len * sizeof(union descriptor));
pmap_trm_free(pldt->ldt_base, pldt->ldt_len *
sizeof(union descriptor));
free(pldt, M_SUBPROC);
} else
mtx_unlock_spin(&dt_lock);
@ -767,8 +766,7 @@ i386_ldt_grow(struct thread *td, int len)
* free the new object and return.
*/
mtx_unlock_spin(&dt_lock);
kmem_free(kernel_arena,
(vm_offset_t)new_ldt->ldt_base,
pmap_trm_free(new_ldt->ldt_base,
new_ldt->ldt_len * sizeof(union descriptor));
free(new_ldt, M_SUBPROC);
mtx_lock_spin(&dt_lock);
@ -801,8 +799,8 @@ i386_ldt_grow(struct thread *td, int len)
mtx_unlock_spin(&dt_lock);
#endif
if (old_ldt_base != NULL_LDT_BASE) {
kmem_free(kernel_arena, (vm_offset_t)old_ldt_base,
old_ldt_len * sizeof(union descriptor));
pmap_trm_free(old_ldt_base, old_ldt_len *
sizeof(union descriptor));
free(new_ldt, M_SUBPROC);
}
mtx_lock_spin(&dt_lock);

View File

@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
*/
#include "opt_clock.h"
#include "opt_compat.h"
#include "opt_cpu.h"
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
@ -117,45 +118,60 @@ static int trap_pfault(struct trapframe *, int, vm_offset_t);
static void trap_fatal(struct trapframe *, vm_offset_t);
void dblfault_handler(void);
extern inthand_t IDTVEC(lcall_syscall);
#define MAX_TRAP_MSG 32
static char *trap_msg[] = {
"", /* 0 unused */
"privileged instruction fault", /* 1 T_PRIVINFLT */
"", /* 2 unused */
"breakpoint instruction fault", /* 3 T_BPTFLT */
"", /* 4 unused */
"", /* 5 unused */
"arithmetic trap", /* 6 T_ARITHTRAP */
"", /* 7 unused */
"", /* 8 unused */
"general protection fault", /* 9 T_PROTFLT */
"trace trap", /* 10 T_TRCTRAP */
"", /* 11 unused */
"page fault", /* 12 T_PAGEFLT */
"", /* 13 unused */
"alignment fault", /* 14 T_ALIGNFLT */
"", /* 15 unused */
"", /* 16 unused */
"", /* 17 unused */
"integer divide fault", /* 18 T_DIVIDE */
"non-maskable interrupt trap", /* 19 T_NMI */
"overflow trap", /* 20 T_OFLOW */
"FPU bounds check fault", /* 21 T_BOUND */
"FPU device not available", /* 22 T_DNA */
"double fault", /* 23 T_DOUBLEFLT */
"FPU operand fetch fault", /* 24 T_FPOPFLT */
"invalid TSS fault", /* 25 T_TSSFLT */
"segment not present fault", /* 26 T_SEGNPFLT */
"stack fault", /* 27 T_STKFLT */
"machine check trap", /* 28 T_MCHK */
"SIMD floating-point exception", /* 29 T_XMMFLT */
"reserved (unknown) fault", /* 30 T_RESERVED */
"", /* 31 unused (reserved) */
"DTrace pid return trap", /* 32 T_DTRACE_RET */
struct trap_data {
bool ei;
const char *msg;
};
static const struct trap_data trap_data[] = {
[T_PRIVINFLT] = { .ei = true, .msg = "privileged instruction fault" },
[T_BPTFLT] = { .ei = false, .msg = "breakpoint instruction fault" },
[T_ARITHTRAP] = { .ei = true, .msg = "arithmetic trap" },
[T_PROTFLT] = { .ei = true, .msg = "general protection fault" },
[T_TRCTRAP] = { .ei = false, .msg = "trace trap" },
[T_PAGEFLT] = { .ei = true, .msg = "page fault" },
[T_ALIGNFLT] = { .ei = true, .msg = "alignment fault" },
[T_DIVIDE] = { .ei = true, .msg = "integer divide fault" },
[T_NMI] = { .ei = false, .msg = "non-maskable interrupt trap" },
[T_OFLOW] = { .ei = true, .msg = "overflow trap" },
[T_BOUND] = { .ei = true, .msg = "FPU bounds check fault" },
[T_DNA] = { .ei = true, .msg = "FPU device not available" },
[T_DOUBLEFLT] = { .ei = false, .msg = "double fault" },
[T_FPOPFLT] = { .ei = true, .msg = "FPU operand fetch fault" },
[T_TSSFLT] = { .ei = true, .msg = "invalid TSS fault" },
[T_SEGNPFLT] = { .ei = true, .msg = "segment not present fault" },
[T_STKFLT] = { .ei = true, .msg = "stack fault" },
[T_MCHK] = { .ei = true, .msg = "machine check trap" },
[T_XMMFLT] = { .ei = true, .msg = "SIMD floating-point exception" },
[T_DTRACE_RET] ={ .ei = true, .msg = "DTrace pid return trap" },
};
static bool
trap_enable_intr(int trapno)
{
MPASS(trapno > 0);
if (trapno < nitems(trap_data) && trap_data[trapno].msg != NULL)
return (trap_data[trapno].ei);
return (false);
}
static const char *
trap_msg(int trapno)
{
const char *res;
static const char unkn[] = "UNKNOWN";
res = NULL;
if (trapno < nitems(trap_data))
res = trap_data[trapno].msg;
if (res == NULL)
res = unkn;
return (res);
}
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
int has_f00f_bug = 0; /* Initialized so that it can be patched. */
#endif
@ -201,6 +217,9 @@ trap(struct trapframe *frame)
VM_CNT_INC(v_trap);
type = frame->tf_trapno;
KASSERT((read_eflags() & PSL_I) == 0,
("trap: interrupts enaabled, type %d frame %p", type, frame));
#ifdef SMP
/* Handler for NMI IPIs used for stopping CPUs. */
if (type == T_NMI && ipi_nmi_handler() == 0)
@ -257,53 +276,34 @@ trap(struct trapframe *frame)
return;
#endif
if ((frame->tf_eflags & PSL_I) == 0) {
/*
* Buggy application or kernel code has disabled
* interrupts and then trapped. Enabling interrupts
* now is wrong, but it is better than running with
* interrupts disabled until they are accidentally
* enabled later.
*/
if (TRAPF_USERMODE(frame) &&
(curpcb->pcb_flags & PCB_VM86CALL) == 0)
uprintf(
"pid %ld (%s): trap %d with interrupts disabled\n",
(long)curproc->p_pid, curthread->td_name, type);
else if (type != T_NMI && type != T_BPTFLT &&
type != T_TRCTRAP &&
frame->tf_eip != (int)cpu_switch_load_gs) {
/*
* XXX not quite right, since this may be for a
* multiple fault in user mode.
*/
printf("kernel trap %d with interrupts disabled\n",
type);
/*
* Page faults need interrupts disabled until later,
* and we shouldn't enable interrupts while holding
* a spin lock.
*/
if (type != T_PAGEFLT &&
td->td_md.md_spinlock_count == 0)
enable_intr();
}
}
eva = 0;
if (type == T_PAGEFLT) {
/*
* For some Cyrix CPUs, %cr2 is clobbered by
* interrupts. This problem is worked around by using
* an interrupt gate for the pagefault handler. We
* are finally ready to read %cr2 and conditionally
* reenable interrupts. If we hold a spin lock, then
* we must not reenable interrupts. This might be a
* spurious page fault.
*/
/*
* We must not allow context switches until %cr2 is read.
* Also, for some Cyrix CPUs, %cr2 is clobbered by interrupts.
* All faults use interrupt gates, so %cr2 can be safely read
* now, before optional enable of the interrupts below.
*/
if (type == T_PAGEFLT)
eva = rcr2();
if (td->td_md.md_spinlock_count == 0)
enable_intr();
}
/*
* Buggy application or kernel code has disabled interrupts
* and then trapped. Enabling interrupts now is wrong, but it
* is better than running with interrupts disabled until they
* are accidentally enabled later.
*/
if ((frame->tf_eflags & PSL_I) == 0 && TRAPF_USERMODE(frame) &&
(curpcb->pcb_flags & PCB_VM86CALL) == 0)
uprintf("pid %ld (%s): trap %d with interrupts disabled\n",
(long)curproc->p_pid, curthread->td_name, type);
/*
* Conditionally reenable interrupts. If we hold a spin lock,
* then we must not reenable interrupts. This might be a
* spurious page fault.
*/
if (trap_enable_intr(type) && td->td_md.md_spinlock_count == 0 &&
frame->tf_eip != (int)cpu_switch_load_gs)
enable_intr();
if (TRAPF_USERMODE(frame) && (curpcb->pcb_flags & PCB_VM86CALL) == 0) {
/* user trap */
@ -583,24 +583,40 @@ trap(struct trapframe *frame)
* problem here and not have to check all the
* selectors and pointers when the user changes
* them.
*
* N.B. Comparing to long mode, 32-bit mode
* does not push %esp on the trap frame,
* because iretl faulted while in ring 0. As
* the consequence, there is no need to fixup
* the stack pointer for doreti_iret_fault,
* the fixup and the complimentary trap() call
* are executed on the main thread stack, not
* on the trampoline stack.
*/
if (frame->tf_eip == (int)doreti_iret) {
frame->tf_eip = (int)doreti_iret_fault;
if (frame->tf_eip == (int)doreti_iret + setidt_disp) {
frame->tf_eip = (int)doreti_iret_fault +
setidt_disp;
return;
}
if (type == T_STKFLT)
break;
if (frame->tf_eip == (int)doreti_popl_ds) {
frame->tf_eip = (int)doreti_popl_ds_fault;
if (frame->tf_eip == (int)doreti_popl_ds +
setidt_disp) {
frame->tf_eip = (int)doreti_popl_ds_fault +
setidt_disp;
return;
}
if (frame->tf_eip == (int)doreti_popl_es) {
frame->tf_eip = (int)doreti_popl_es_fault;
if (frame->tf_eip == (int)doreti_popl_es +
setidt_disp) {
frame->tf_eip = (int)doreti_popl_es_fault +
setidt_disp;
return;
}
if (frame->tf_eip == (int)doreti_popl_fs) {
frame->tf_eip = (int)doreti_popl_fs_fault;
if (frame->tf_eip == (int)doreti_popl_fs +
setidt_disp) {
frame->tf_eip = (int)doreti_popl_fs_fault +
setidt_disp;
return;
}
if (curpcb->pcb_onfault != NULL) {
@ -627,23 +643,6 @@ trap(struct trapframe *frame)
case T_TRCTRAP: /* trace trap */
kernel_trctrap:
if (frame->tf_eip == (int)IDTVEC(lcall_syscall)) {
/*
* We've just entered system mode via the
* syscall lcall. Continue single stepping
* silently until the syscall handler has
* saved the flags.
*/
return;
}
if (frame->tf_eip == (int)IDTVEC(lcall_syscall) + 1) {
/*
* The syscall handler has now saved the
* flags. Stop single stepping it.
*/
frame->tf_eflags &= ~PSL_T;
return;
}
/*
* Ignore debug register trace traps due to
* accesses in the user's address space, which
@ -711,10 +710,11 @@ trap(struct trapframe *frame)
ksi.ksi_trapno = type;
if (uprintf_signal) {
uprintf("pid %d comm %s: signal %d err %x code %d type %d "
"addr 0x%x esp 0x%08x eip 0x%08x "
"addr 0x%x ss 0x%04x esp 0x%08x cs 0x%04x eip 0x%08x "
"<%02x %02x %02x %02x %02x %02x %02x %02x>\n",
p->p_pid, p->p_comm, signo, frame->tf_err, ucode, type,
addr, frame->tf_esp, frame->tf_eip,
addr, frame->tf_ss, frame->tf_esp, frame->tf_cs,
frame->tf_eip,
fubyte((void *)(frame->tf_eip + 0)),
fubyte((void *)(frame->tf_eip + 1)),
fubyte((void *)(frame->tf_eip + 2)),
@ -791,7 +791,7 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
}
}
va = trunc_page(eva);
if (va >= KERNBASE) {
if (va >= PMAP_TRM_MIN_ADDRESS) {
/*
* Don't allow user-mode faults in kernel address space.
* An exception: if the faulting address is the invalid
@ -806,20 +806,17 @@ trap_pfault(struct trapframe *frame, int usermode, vm_offset_t eva)
#endif
if (usermode)
return (SIGSEGV);
map = kernel_map;
trap_fatal(frame, eva);
return (-1);
} else {
map = &p->p_vmspace->vm_map;
map = usermode ? &p->p_vmspace->vm_map : kernel_map;
/*
* When accessing a user-space address, kernel must be
* ready to accept the page fault, and provide a
* handling routine. Since accessing the address
* without the handler is a bug, do not try to handle
* it normally, and panic immediately.
* Kernel cannot access a user-space address directly
* because user pages are not mapped. Also, page
* faults must not be caused during the interrupts.
*/
if (!usermode && (td->td_intr_nesting_level != 0 ||
curpcb->pcb_onfault == NULL)) {
if (!usermode && td->td_intr_nesting_level != 0) {
trap_fatal(frame, eva);
return (-1);
}
@ -882,17 +879,12 @@ trap_fatal(frame, eva)
int code, ss, esp;
u_int type;
struct soft_segment_descriptor softseg;
char *msg;
code = frame->tf_err;
type = frame->tf_trapno;
sdtossd(&gdt[IDXSEL(frame->tf_cs & 0xffff)].sd, &softseg);
if (type <= MAX_TRAP_MSG)
msg = trap_msg[type];
else
msg = "UNKNOWN";
printf("\n\nFatal trap %d: %s while in %s mode\n", type, msg,
printf("\n\nFatal trap %d: %s while in %s mode\n", type, trap_msg(type),
frame->tf_eflags & PSL_VM ? "vm86" :
ISPL(frame->tf_cs) == SEL_UPL ? "user" : "kernel");
#ifdef SMP
@ -955,8 +947,8 @@ trap_fatal(frame, eva)
}
#endif
printf("trap number = %d\n", type);
if (type <= MAX_TRAP_MSG)
panic("%s", trap_msg[type]);
if (trap_msg(type) != NULL)
panic("%s", trap_msg(type));
else
panic("unknown/reserved trap");
}
@ -974,16 +966,16 @@ trap_fatal(frame, eva)
* of this is that "trace <ebp>" in ddb won't work.
*/
void
dblfault_handler()
dblfault_handler(void)
{
#ifdef KDTRACE_HOOKS
if (dtrace_doubletrap_func != NULL)
(*dtrace_doubletrap_func)();
#endif
printf("\nFatal double fault:\n");
printf("eip = 0x%x\n", PCPU_GET(common_tss.tss_eip));
printf("esp = 0x%x\n", PCPU_GET(common_tss.tss_esp));
printf("ebp = 0x%x\n", PCPU_GET(common_tss.tss_ebp));
printf("eip = 0x%x\n", PCPU_GET(common_tssp)->tss_eip);
printf("esp = 0x%x\n", PCPU_GET(common_tssp)->tss_esp);
printf("ebp = 0x%x\n", PCPU_GET(common_tssp)->tss_ebp);
#ifdef SMP
/* two separate prints in case of a trap on an unmapped page */
printf("cpuid = %d; ", PCPU_GET(cpuid));
@ -1001,13 +993,42 @@ cpu_fetch_syscall_args(struct thread *td)
caddr_t params;
long tmp;
int error;
#ifdef COMPAT_43
u_int32_t eip;
int cs;
#endif
p = td->td_proc;
frame = td->td_frame;
sa = &td->td_sa;
params = (caddr_t)frame->tf_esp + sizeof(int);
#ifdef COMPAT_43
if (__predict_false(frame->tf_cs == 7 && frame->tf_eip == 2)) {
/*
* In lcall $7,$0 after int $0x80. Convert the user
* frame to what it would be for a direct int 0x80 instead
* of lcall $7,$0, by popping the lcall return address.
*/
error = fueword32((void *)frame->tf_esp, &eip);
if (error == -1)
return (EFAULT);
cs = fuword16((void *)(frame->tf_esp + sizeof(u_int32_t)));
if (cs == -1)
return (EFAULT);
/*
* Unwind in-kernel frame after all stack frame pieces
* were successfully read.
*/
frame->tf_eip = eip;
frame->tf_cs = cs;
frame->tf_esp += 2 * sizeof(u_int32_t);
frame->tf_err = 7; /* size of lcall $7,$0 */
}
#endif
sa->code = frame->tf_eax;
params = (caddr_t)frame->tf_esp + sizeof(uint32_t);
/*
* Need to check if this is a 32 bit or 64 bit syscall.
@ -1020,7 +1041,7 @@ cpu_fetch_syscall_args(struct thread *td)
if (error == -1)
return (EFAULT);
sa->code = tmp;
params += sizeof(int);
params += sizeof(uint32_t);
} else if (sa->code == SYS___syscall) {
/*
* Like syscall, but code is a quad, so as to maintain
@ -1043,7 +1064,7 @@ cpu_fetch_syscall_args(struct thread *td)
if (params != NULL && sa->narg != 0)
error = copyin(params, (caddr_t)sa->args,
(u_int)(sa->narg * sizeof(int)));
(u_int)(sa->narg * sizeof(uint32_t)));
else
error = 0;

View File

@ -78,6 +78,55 @@ struct system_map {
#define PUSH_MASK ~(PSL_VM | PSL_RF | PSL_I)
#define POP_MASK ~(PSL_VIP | PSL_VIF | PSL_VM | PSL_RF | PSL_IOPL)
static int
vm86_suword16(volatile void *base, int word)
{
if (curthread->td_critnest != 0) {
*(volatile uint16_t *)base = word;
return (0);
}
return (suword16(base, word));
}
static int
vm86_suword(volatile void *base, long word)
{
if (curthread->td_critnest != 0) {
*(volatile long *)base = word;
return (0);
}
return (suword(base, word));
}
static int
vm86_fubyte(volatile const void *base)
{
if (curthread->td_critnest != 0)
return (*(volatile const u_char *)base);
return (fubyte(base));
}
static int
vm86_fuword16(volatile const void *base)
{
if (curthread->td_critnest != 0)
return (*(volatile const uint16_t *)base);
return (fuword16(base));
}
static long
vm86_fuword(volatile const void *base)
{
if (curthread->td_critnest != 0)
return (*(volatile const long *)base);
return (fuword(base));
}
static __inline caddr_t
MAKE_ADDR(u_short sel, u_short off)
{
@ -101,20 +150,20 @@ static __inline void
PUSH(u_short x, struct vm86frame *vmf)
{
vmf->vmf_sp -= 2;
suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
vm86_suword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
}
static __inline void
PUSHL(u_int x, struct vm86frame *vmf)
{
vmf->vmf_sp -= 4;
suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
vm86_suword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp), x);
}
static __inline u_short
POP(struct vm86frame *vmf)
{
u_short x = fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
u_short x = vm86_fuword16(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
vmf->vmf_sp += 2;
return (x);
@ -123,7 +172,7 @@ POP(struct vm86frame *vmf)
static __inline u_int
POPL(struct vm86frame *vmf)
{
u_int x = fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
u_int x = vm86_fuword(MAKE_ADDR(vmf->vmf_ss, vmf->vmf_sp));
vmf->vmf_sp += 4;
return (x);
@ -152,16 +201,16 @@ vm86_emulate(struct vm86frame *vmf)
retcode = SIGTRAP;
addr = MAKE_ADDR(vmf->vmf_cs, vmf->vmf_ip);
i_byte = fubyte(addr);
i_byte = vm86_fubyte(addr);
if (i_byte == ADDRESS_SIZE_PREFIX) {
i_byte = fubyte(++addr);
i_byte = vm86_fubyte(++addr);
inc_ip++;
}
if (vm86->vm86_has_vme) {
switch (i_byte) {
case OPERAND_SIZE_PREFIX:
i_byte = fubyte(++addr);
i_byte = vm86_fubyte(++addr);
inc_ip++;
switch (i_byte) {
case PUSHF:
@ -241,7 +290,7 @@ vm86_emulate(struct vm86frame *vmf)
switch (i_byte) {
case OPERAND_SIZE_PREFIX:
i_byte = fubyte(++addr);
i_byte = vm86_fubyte(++addr);
inc_ip++;
switch (i_byte) {
case PUSHF:
@ -293,7 +342,7 @@ vm86_emulate(struct vm86frame *vmf)
return (retcode);
case INTn:
i_byte = fubyte(addr + 1);
i_byte = vm86_fubyte(addr + 1);
if ((vm86->vm86_intmap[i_byte >> 3] & (1 << (i_byte & 7))) != 0)
break;
if (vm86->vm86_eflags & PSL_VIF)
@ -303,7 +352,7 @@ vm86_emulate(struct vm86frame *vmf)
PUSH((vmf->vmf_flags & PUSH_MASK) | PSL_IOPL, vmf);
PUSH(vmf->vmf_cs, vmf);
PUSH(vmf->vmf_ip + inc_ip + 1, vmf); /* increment IP */
GET_VEC(fuword((caddr_t)(i_byte * 4)),
GET_VEC(vm86_fuword((caddr_t)(i_byte * 4)),
&vmf->vmf_cs, &vmf->vmf_ip);
vmf->vmf_flags &= ~PSL_T;
vm86->vm86_eflags &= ~PSL_VIF;
@ -548,6 +597,7 @@ vm86_prepcall(struct vm86frame *vmf)
void
vm86_trap(struct vm86frame *vmf)
{
void (*p)(struct vm86frame *);
caddr_t addr;
/* "should not happen" */
@ -560,21 +610,26 @@ vm86_trap(struct vm86frame *vmf)
else
vmf->vmf_trapno = vmf->vmf_trapno << 16;
vm86_biosret(vmf);
p = (void (*)(struct vm86frame *))((uintptr_t)vm86_biosret +
setidt_disp);
p(vmf);
}
int
vm86_intcall(int intnum, struct vm86frame *vmf)
{
int (*p)(struct vm86frame *);
int retval;
if (intnum < 0 || intnum > 0xff)
return (EINVAL);
vmf->vmf_trapno = intnum;
p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
setidt_disp);
mtx_lock(&vm86_lock);
critical_enter();
retval = vm86_bioscall(vmf);
retval = p(vmf);
critical_exit();
mtx_unlock(&vm86_lock);
return (retval);
@ -589,10 +644,12 @@ vm86_intcall(int intnum, struct vm86frame *vmf)
int
vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
{
pt_entry_t *pte = (pt_entry_t *)vm86paddr;
pt_entry_t *pte;
int (*p)(struct vm86frame *);
vm_paddr_t page;
int i, entry, retval;
pte = (pt_entry_t *)vm86paddr;
mtx_lock(&vm86_lock);
for (i = 0; i < vmc->npages; i++) {
page = vtophys(vmc->pmap[i].kva & PG_FRAME);
@ -603,8 +660,10 @@ vm86_datacall(int intnum, struct vm86frame *vmf, struct vm86context *vmc)
}
vmf->vmf_trapno = intnum;
p = (int (*)(struct vm86frame *))((uintptr_t)vm86_bioscall +
setidt_disp);
critical_enter();
retval = vm86_bioscall(vmf);
retval = p(vmf);
critical_exit();
for (i = 0; i < vmc->npages; i++) {

View File

@ -100,9 +100,8 @@ ENTRY(vm86_bioscall)
movl %cr3,%eax
pushl %eax /* save address space */
movl IdlePTD,%ecx
movl IdlePTD,%ecx /* va (and pa) of Idle PTD */
movl %ecx,%ebx
addl $KERNBASE,%ebx /* va of Idle PTD */
movl 0(%ebx),%eax
pushl %eax /* old ptde != 0 when booting */
pushl %ebx /* keep for reuse */
@ -119,7 +118,8 @@ ENTRY(vm86_bioscall)
movl SCR_VMFRAME(%edx),%esp /* switch to new stack */
pushl %esp
call vm86_prepcall /* finish setup */
movl $vm86_prepcall, %eax
call *%eax /* finish setup */
add $4, %esp
/*

View File

@ -204,9 +204,11 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
* Create a new fresh stack for the new process.
* Copy the trap frame for the return to user mode as if from a
* syscall. This copies most of the user mode register values.
* The -16 is so we can expand the trapframe if we go to vm86.
* The -VM86_STACK_SPACE (-16) is so we can expand the trapframe
* if we go to vm86.
*/
td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb - 16) - 1;
td2->td_frame = (struct trapframe *)((caddr_t)td2->td_pcb -
VM86_STACK_SPACE) - 1;
bcopy(td1->td_frame, td2->td_frame, sizeof(struct trapframe));
td2->td_frame->tf_eax = 0; /* Child returns zero */
@ -238,7 +240,7 @@ cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags)
pcb2->pcb_ebp = 0;
pcb2->pcb_esp = (int)td2->td_frame - sizeof(void *);
pcb2->pcb_ebx = (int)td2; /* fork_trampoline argument */
pcb2->pcb_eip = (int)fork_trampoline;
pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
/*-
* pcb2->pcb_dr*: cloned above.
* pcb2->pcb_savefpu: cloned above.
@ -344,8 +346,7 @@ cpu_thread_clean(struct thread *td)
* XXX do we need to move the TSS off the allocated pages
* before freeing them? (not done here)
*/
kmem_free(kernel_arena, (vm_offset_t)pcb->pcb_ext,
ctob(IOPAGES + 1));
pmap_trm_free(pcb->pcb_ext, ctob(IOPAGES + 1));
pcb->pcb_ext = NULL;
}
}
@ -367,7 +368,8 @@ cpu_thread_alloc(struct thread *td)
struct xstate_hdr *xhdr;
td->td_pcb = pcb = get_pcb_td(td);
td->td_frame = (struct trapframe *)((caddr_t)pcb - 16) - 1;
td->td_frame = (struct trapframe *)((caddr_t)pcb -
VM86_STACK_SPACE) - 1;
pcb->pcb_ext = NULL;
pcb->pcb_save = get_pcb_user_save_pcb(pcb);
if (use_xsave) {
@ -462,7 +464,7 @@ cpu_copy_thread(struct thread *td, struct thread *td0)
pcb2->pcb_ebp = 0;
pcb2->pcb_esp = (int)td->td_frame - sizeof(void *); /* trampoline arg */
pcb2->pcb_ebx = (int)td; /* trampoline arg */
pcb2->pcb_eip = (int)fork_trampoline;
pcb2->pcb_eip = (int)fork_trampoline + setidt_disp;
pcb2->pcb_gs = rgs();
/*
* If we didn't copy the pcb, we'd need to do the following registers:
@ -581,7 +583,7 @@ sf_buf_map(struct sf_buf *sf, int flags)
*/
ptep = vtopte(sf->kva);
opte = *ptep;
*ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
*ptep = VM_PAGE_TO_PHYS(sf->m) | PG_RW | PG_V |
pmap_cache_bits(sf->m->md.pat_mode, 0);
/*

View File

@ -1,3 +1,4 @@
/* -*- mode: asm -*- */
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
@ -135,6 +136,10 @@
#endif /* GPROF */
#ifdef LOCORE
#define GSEL_KPL 0x0020 /* GSEL(GCODE_SEL, SEL_KPL) */
#define SEL_RPL_MASK 0x0003
/*
* Convenience macro for declaring interrupt entry points.
*/
@ -144,16 +149,21 @@
/*
* Macros to create and destroy a trap frame.
*/
#define PUSH_FRAME \
pushl $0 ; /* dummy error code */ \
pushl $0 ; /* dummy trap type */ \
pushal ; /* 8 ints */ \
pushl $0 ; /* save data and extra segments ... */ \
movw %ds,(%esp) ; \
pushl $0 ; \
movw %es,(%esp) ; \
pushl $0 ; \
.macro PUSH_FRAME2
pushal
pushl $0
movw %ds,(%esp)
pushl $0
movw %es,(%esp)
pushl $0
movw %fs,(%esp)
.endm
.macro PUSH_FRAME
pushl $0 /* dummy error code */
pushl $0 /* dummy trap type */
PUSH_FRAME2
.endm
/*
* Access per-CPU data.
@ -167,12 +177,43 @@
/*
* Setup the kernel segment registers.
*/
#define SET_KERNEL_SREGS \
movl $KDSEL, %eax ; /* reload with kernel's data segment */ \
movl %eax, %ds ; \
movl %eax, %es ; \
movl $KPSEL, %eax ; /* reload with per-CPU data segment */ \
.macro SET_KERNEL_SREGS
movl $KDSEL, %eax /* reload with kernel's data segment */
movl %eax, %ds
movl %eax, %es
movl $KPSEL, %eax /* reload with per-CPU data segment */
movl %eax, %fs
.endm
.macro NMOVE_STACKS
movl PCPU(KESP0), %edx
movl $TF_SZ, %ecx
testl $PSL_VM, TF_EFLAGS(%esp)
jz 1001f
addl $(4*4), %ecx
1001: subl %ecx, %edx
movl %edx, %edi
movl %esp, %esi
rep; movsb
movl %edx, %esp
.endm
.macro MOVE_STACKS
call 1000f
1000: popl %eax
movl (tramp_idleptd - 1000b)(%eax), %eax
movl %eax, %cr3
NMOVE_STACKS
.endm
.macro KENTER
testl $PSL_VM, TF_EFLAGS(%esp)
jnz 2f
testb $SEL_RPL_MASK, TF_CS(%esp)
jz 2f
1: MOVE_STACKS
2:
.endm
#endif /* LOCORE */

View File

@ -41,4 +41,8 @@
#define CS_SECURE(cs) (ISPL(cs) == SEL_UPL)
#define EFL_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
#define TRAMP_STACK_SZ 4096
#define TRAMP_COPYOUT_SZ 128
#define VM86_STACK_SPACE 16
#endif /* _I386_FRAME_H_ */

View File

@ -45,14 +45,18 @@ extern int szfreebsd4_sigcode;
#endif
#ifdef COMPAT_43
extern int szosigcode;
extern int sz_lcall_tramp;
#endif
extern uint32_t *vm_page_dump;
extern vm_offset_t proc0kstack;
extern uintptr_t setidt_disp;
struct segment_descriptor;
union savefpu;
void bcopyb(const void *from, void *to, size_t len);
void cpu_switch_load_gs(void) __asm(__STRING(cpu_switch_load_gs));
void copyout_init_tramp(void);
void doreti_iret(void) __asm(__STRING(doreti_iret));
void doreti_iret_fault(void) __asm(__STRING(doreti_iret_fault));
void doreti_popl_ds(void) __asm(__STRING(doreti_popl_ds));
@ -71,6 +75,7 @@ void ppro_reenable_apic(void);
void set_fsbase(struct thread *td, uint32_t base);
void set_gsbase(struct thread *td, uint32_t base);
void setidt(int idx, alias_for_inthand_t *func, int typ, int dpl, int selec);
void setidt_nodisp(int idx, uintptr_t func, int typ, int dpl, int selec);
union savefpu *get_pcb_user_save_td(struct thread *td);
union savefpu *get_pcb_user_save_pcb(struct pcb *pcb);

View File

@ -164,7 +164,6 @@
#define pgtok(x) ((x) * (PAGE_SIZE / 1024))
#define INKERNEL(va) (((vm_offset_t)(va)) >= VM_MAXUSER_ADDRESS && \
((vm_offset_t)(va)) < VM_MAX_KERNEL_ADDRESS)
#define INKERNEL(va) (TRUE)
#endif /* !_I386_INCLUDE_PARAM_H_ */

View File

@ -267,8 +267,8 @@ struct smbios_structure_header {
};
#ifdef _KERNEL
#define BIOS_PADDRTOVADDR(x) ((x) + KERNBASE)
#define BIOS_VADDRTOPADDR(x) ((x) - KERNBASE)
#define BIOS_PADDRTOVADDR(x) ((x) + PMAP_MAP_LOW)
#define BIOS_VADDRTOPADDR(x) ((x) - PMAP_MAP_LOW)
struct bios_oem_signature {
char * anchor; /* search anchor string in BIOS memory */

View File

@ -42,21 +42,23 @@
#include <sys/_mutex.h>
/*
* The SMP parts are setup in pmap.c and locore.s for the BSP, and
* mp_machdep.c sets up the data for the AP's to "see" when they awake.
* The reason for doing it via a struct is so that an array of pointers
* to each CPU's data can be set up for things like "check curproc on all
* other processors"
* The SMP parts are setup in pmap.c and machdep.c for the BSP, and
* pmap.c and mp_machdep.c sets up the data for the AP's to "see" when
* they awake. The reason for doing it via a struct is so that an
* array of pointers to each CPU's data can be set up for things like
* "check curproc on all other processors"
*/
#define PCPU_MD_FIELDS \
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
struct pcpu *pc_prvspace; /* Self-reference */ \
struct pmap *pc_curpmap; \
struct i386tss pc_common_tss; \
struct segment_descriptor pc_common_tssd; \
struct segment_descriptor *pc_tss_gdt; \
struct segment_descriptor *pc_fsgs_gdt; \
struct i386tss *pc_common_tssp; \
u_int pc_kesp0; \
u_int pc_trampstk; \
int pc_currentldt; \
u_int pc_acpi_id; /* ACPI CPU id */ \
u_int pc_apic_id; \
@ -69,8 +71,13 @@
caddr_t pc_cmap_addr1; \
caddr_t pc_cmap_addr2; \
vm_offset_t pc_qmap_addr; /* KVA for temporary mappings */\
vm_offset_t pc_copyout_maddr; \
vm_offset_t pc_copyout_saddr; \
struct mtx pc_copyout_mlock; \
struct sx pc_copyout_slock; \
char *pc_copyout_buf; \
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
char __pad[445]
char __pad[550]
#ifdef _KERNEL

View File

@ -112,12 +112,10 @@
* For PAE, the page table page unit size is 2MB. This means that 512 pages
* is 1 Gigabyte. Double everything. It must be a multiple of 8 for PAE.
*/
#ifndef KVA_PAGES
#if defined(PAE) || defined(PAE_TABLES)
#define KVA_PAGES 512
#define KVA_PAGES (512*4)
#else
#define KVA_PAGES 256
#endif
#define KVA_PAGES (256*4)
#endif
/*
@ -150,12 +148,13 @@
/*
* The *PTDI values control the layout of virtual memory
*
* XXX This works for now, but I am not real happy with it, I'll fix it
* right after I fix locore.s and the magic 28K hole
*/
#define KPTDI (NPDEPTD-NKPDE) /* start of kernel virtual pde's */
#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */
#define KPTDI 0 /* start of kernel virtual pde's */
#define LOWPTDI 1 /* low memory map pde */
#define KERNPTDI 2 /* start of kernel text pde */
#define PTDPTDI (NPDEPTD - 1 - NPGPTD) /* ptd entry that points
to ptd! */
#define TRPTDI (NPDEPTD - 1) /* u/k trampoline ptd */
/*
* XXX doesn't really belong here I guess...
@ -311,6 +310,7 @@ struct pmap {
table */
#endif
struct vm_radix pm_root; /* spare page table pages */
vm_page_t pm_ptdpg[NPGPTD];
};
typedef struct pmap *pmap_t;
@ -396,6 +396,8 @@ void pmap_invalidate_cache(void);
void pmap_invalidate_cache_pages(vm_page_t *pages, int count);
void pmap_invalidate_cache_range(vm_offset_t sva, vm_offset_t eva,
boolean_t force);
void *pmap_trm_alloc(size_t size, int flags);
void pmap_trm_free(void *addr, size_t size);
void invltlb_glob(void);

View File

@ -84,11 +84,10 @@ struct region_descriptor {
#ifdef _KERNEL
extern int _default_ldt;
extern union descriptor gdt[];
extern union descriptor ldt[NLDT];
extern union descriptor *gdt;
extern union descriptor *ldt;
extern struct soft_segment_descriptor gdt_segs[];
extern struct gate_descriptor *idt;
extern struct region_descriptor r_gdt, r_idt;
void lgdt(struct region_descriptor *rdp);
void sdtossd(struct segment_descriptor *sdp,

View File

@ -136,7 +136,7 @@
* Kernel physical load address.
*/
#ifndef KERNLOAD
#define KERNLOAD (1 << PDRSHIFT)
#define KERNLOAD (KERNPTDI << PDRSHIFT)
#endif /* !defined(KERNLOAD) */
/*
@ -146,23 +146,47 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
#define VM_MAX_KERNEL_ADDRESS VADDR(KPTDI+NKPDE-1, NPTEPG-1)
#define VM_MAX_KERNEL_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MIN_KERNEL_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define VM_MIN_KERNEL_ADDRESS 0
#define KERNBASE VADDR(KPTDI, 0)
#define KERNBASE KERNLOAD
#define UPT_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define UPT_MIN_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MAXUSER_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MAXUSER_ADDRESS VADDR(TRPTDI, 0)
#define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE)
#define USRSTACK SHAREDPAGE
#define VM_MAX_ADDRESS VADDR(PTDPTDI, PTDPTDI)
#define VM_MAX_ADDRESS VADDR(PTDPTDI, 0)
#define VM_MIN_ADDRESS ((vm_offset_t)0)
#define PMAP_TRM_MIN_ADDRESS VM_MAXUSER_ADDRESS
#define PMAP_TRM_MAX_ADDRESS 0xffffffff
#define PMAP_MAP_LOW VADDR(LOWPTDI, 0)
/*
* KVA layout. The unit of the system allocation is single PDE, which
* represents NBPDR bytes, aligned to NBPDR. NBPDR is 4M for non-PAE
* page tables, and 2M for PAE. Addresses below are shown for non-PAE.
*
* 0x00000000 - 0x003fffff Transient identity map of low memory (0-4M),
* normally disabled to catch NULL derefs.
* 0x00400000 - 0x007fffff Fixed mapping of the low memory (0-4M).
* 0x00800000 - 0xffbfffff KERNBASE (VA) == KERNLOAD (PA), kernel
* text + data and all kernel maps. Managed
* by MI VM.
* 0xffc00000 - 0xffdfffff Recursive kernel page table mapping, pointed
* to by PTmap. PTD[] recusively points
* into PTmap.
* 0xffe00000 - 0xffffffff Kernel/User mode shared PDE, contains GDT,
* IDT, TSS, LDT, trampoline code and stacks.
* Managed by pmap_trm_alloc().
*/
/*
* How many physical pages per kmem arena virtual page.
*/

View File

@ -67,7 +67,12 @@ __FBSDID("$FreeBSD$");
static int exec_aout_imgact(struct image_params *imgp);
static int aout_fixup(register_t **stack_base, struct image_params *imgp);
#define AOUT32_USRSTACK 0xbfc00000
#if defined(__i386__)
#define AOUT32_PS_STRINGS (AOUT32_USRSTACK - sizeof(struct ps_strings))
struct sysentvec aout_sysvec = {
.sv_size = SYS_MAXSYSCALL,
.sv_table = sysent,
@ -85,9 +90,9 @@ struct sysentvec aout_sysvec = {
.sv_minsigstksz = MINSIGSTKSZ,
.sv_pagesize = PAGE_SIZE,
.sv_minuser = VM_MIN_ADDRESS,
.sv_maxuser = VM_MAXUSER_ADDRESS,
.sv_usrstack = USRSTACK,
.sv_psstrings = PS_STRINGS,
.sv_maxuser = AOUT32_USRSTACK,
.sv_usrstack = AOUT32_USRSTACK,
.sv_psstrings = AOUT32_PS_STRINGS,
.sv_stackprot = VM_PROT_ALL,
.sv_copyout_strings = exec_copyout_strings,
.sv_setregs = exec_setregs,
@ -104,10 +109,9 @@ struct sysentvec aout_sysvec = {
#elif defined(__amd64__)
#define AOUT32_USRSTACK 0xbfc00000
#define AOUT32_PS_STRINGS \
(AOUT32_USRSTACK - sizeof(struct freebsd32_ps_strings))
#define AOUT32_MINUSER FREEBSD32_MINUSER
#define AOUT32_MINUSER FREEBSD32_MINUSER
extern const char *freebsd32_syscallnames[];
extern u_long ia32_maxssiz;

View File

@ -480,7 +480,9 @@ static int w_max_used_index = 0;
static unsigned int w_generation = 0;
static const char w_notrunning[] = "Witness not running\n";
static const char w_stillcold[] = "Witness is still cold\n";
#ifdef __i386__
static const char w_notallowed[] = "The sysctl is disabled on the arch\n";
#endif
static struct witness_order_list_entry order_lists[] = {
/*
@ -2779,6 +2781,11 @@ sysctl_debug_witness_fullgraph(SYSCTL_HANDLER_ARGS)
struct sbuf *sb;
int error;
#ifdef __i386__
error = SYSCTL_OUT(req, w_notallowed, sizeof(w_notallowed));
return (error);
#endif
if (witness_watch < 1) {
error = SYSCTL_OUT(req, w_notrunning, sizeof(w_notrunning));
return (error);

View File

@ -141,8 +141,13 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
}
#define WARMBOOT_TARGET 0
#ifdef __amd64__
#define WARMBOOT_OFF (KERNBASE + 0x0467)
#define WARMBOOT_SEG (KERNBASE + 0x0469)
#else /* __i386__ */
#define WARMBOOT_OFF (PMAP_MAP_LOW + 0x0467)
#define WARMBOOT_SEG (PMAP_MAP_LOW + 0x0469)
#endif
#define CMOS_REG (0x70)
#define CMOS_DATA (0x71)
@ -186,7 +191,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc)
* cpususpend_handler() and we will release them soon. Then each
* will invalidate its TLB.
*/
kernel_pmap->pm_pdir[0] = 0;
PTD[KPTDI] = 0;
invltlb_glob();
#endif
@ -256,7 +261,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
* be careful to use the kernel map (PTD[0] is for curthread
* which may be a user thread in deprecated APIs).
*/
kernel_pmap->pm_pdir[0] = PTD[KPTDI];
PTD[KPTDI] = PTD[LOWPTDI];
#endif
/* Call ACPICA to enter the desired sleep state */

View File

@ -78,11 +78,9 @@ __FBSDID("$FreeBSD$");
#ifdef __amd64__
#define SDT_APIC SDT_SYSIGT
#define SDT_APICT SDT_SYSIGT
#define GSEL_APIC 0
#else
#define SDT_APIC SDT_SYS386IGT
#define SDT_APICT SDT_SYS386TGT
#define GSEL_APIC GSEL(GCODE_SEL, SEL_KPL)
#endif
@ -517,7 +515,7 @@ native_lapic_init(vm_paddr_t addr)
/* Local APIC CMCI. */
setidt(APIC_CMC_INT, pti ? IDTVEC(cmcint_pti) : IDTVEC(cmcint),
SDT_APICT, SEL_KPL, GSEL_APIC);
SDT_APIC, SEL_KPL, GSEL_APIC);
if ((resource_int_value("apic", 0, "clock", &i) != 0 || i != 0)) {
arat = 0;
@ -1605,7 +1603,7 @@ native_apic_disable_vector(u_int apic_id, u_int vector)
* We can not currently clear the idt entry because other cpus
* may have a valid vector at this offset.
*/
setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
SEL_KPL, GSEL_APIC);
#endif
}
@ -2146,7 +2144,7 @@ native_lapic_ipi_free(int vector)
KASSERT(func != (uintptr_t)&IDTVEC(rsvd) &&
func != (uintptr_t)&IDTVEC(rsvd_pti),
("invalid idtfunc %#lx", func));
setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APICT,
setidt(vector, pti ? &IDTVEC(rsvd_pti) : &IDTVEC(rsvd), SDT_APIC,
SEL_KPL, GSEL_APIC);
mtx_unlock_spin(&icu_lock);
}

View File

@ -1686,8 +1686,10 @@ invltlb_handler(void)
generation = smp_tlb_generation;
if (smp_tlb_pmap == kernel_pmap)
invltlb_glob();
#ifdef __amd64__
else
invltlb();
#endif
PCPU_SET(smp_tlb_done, generation);
}
@ -1704,7 +1706,10 @@ invlpg_handler(void)
#endif /* COUNT_IPIS */
generation = smp_tlb_generation; /* Overlap with serialization */
invlpg(smp_tlb_addr1);
#ifdef __i386__
if (smp_tlb_pmap == kernel_pmap)
#endif
invlpg(smp_tlb_addr1);
PCPU_SET(smp_tlb_done, generation);
}
@ -1724,10 +1729,13 @@ invlrng_handler(void)
addr = smp_tlb_addr1;
addr2 = smp_tlb_addr2;
generation = smp_tlb_generation; /* Overlap with serialization */
do {
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
#ifdef __i386__
if (smp_tlb_pmap == kernel_pmap)
#endif
do {
invlpg(addr);
addr += PAGE_SIZE;
} while (addr < addr2);
PCPU_SET(smp_tlb_done, generation);
}

View File

@ -221,8 +221,13 @@ static int
search_for_sig(u_int32_t target, int count)
{
int x;
u_int32_t *addr = (u_int32_t *) (KERNBASE + target);
u_int32_t *addr;
#ifdef __amd64__
addr = (u_int32_t *) (KERNBASE + target);
#else /* __i386__ */
addr = (u_int32_t *) (PMAP_MAP_LOW + target);
#endif
for (x = 0; x < count; x += 4)
if (addr[x] == MP_SIG)
/* make array index a byte index */
@ -253,7 +258,13 @@ mptable_probe(void)
u_int32_t target;
/* see if EBDA exists */
if ((segment = (u_long) * (u_short *) (KERNBASE + 0x40e)) != 0) {
if ((segment = (u_long) * (u_short *) (
#ifdef __amd64__
KERNBASE
#else /* __i386__ */
PMAP_MAP_LOW
#endif
+ 0x40e)) != 0) {
/* search first 1K of EBDA */
target = (u_int32_t) (segment << 4);
if ((x = search_for_sig(target, 1024 / 4)) >= 0)