d86c1f0dc1
The change makes the user and kernel address spaces on i386 independent, giving each almost the full 4G of usable virtual addresses except for one PDE at top used for trampoline and per-CPU trampoline stacks, and system structures that must be always mapped, namely IDT, GDT, common TSS and LDT, and process-private TSS and LDT if allocated. By using 1:1 mapping for the kernel text and data, it appeared possible to eliminate assembler part of the locore.S which bootstraps initial page table and KPTmap. The code is rewritten in C and moved into the pmap_cold(). The comment in vmparam.h explains the KVA layout. There is no PCID mechanism available in protected mode, so each kernel/user switch forth and back completely flushes the TLB, except for the trampoline PTD region. The TLB invalidations for userspace becomes trivial, because IPI handlers switch page tables. On the other hand, context switches no longer need to reload %cr3. copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for new copyout(9) is compatibility with wiring user buffers around sysctl handlers. This explains two kind of locks for copyout ptes and accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow path, is only tried after the 'fast path' failed, which temporary changes mapping to the userspace and copies the data to/from small per-cpu buffer in the trampoline. If a page fault occurs during the copy, it is short-circuit by exception.s to not even reach C code. The change was motivated by the need to implement the Meltdown mitigation, but instead of KPTI the full split is done. The i386 architecture already shows the sizing problems, in particular, it is impossible to link clang and lld with debugging. I expect that the issues due to the virtual address space limits would only exaggerate and the split gives more liveness to the platform. Tested by: pho Discussed with: bde Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D14633
199 lines
7.4 KiB
Plaintext
199 lines
7.4 KiB
Plaintext
/* $FreeBSD$ */
|
|
OUTPUT_FORMAT("elf32-i386-freebsd", "elf32-i386-freebsd", "elf32-i386-freebsd")
|
|
OUTPUT_ARCH(i386)
|
|
ENTRY(btext)
|
|
SEARCH_DIR(/usr/lib);
|
|
SECTIONS
|
|
{
|
|
/* Read-only sections, merged into text segment: */
|
|
. = kernbase + SIZEOF_HEADERS;
|
|
.interp : { *(.interp) }
|
|
.hash : { *(.hash) }
|
|
.gnu.hash : { *(.gnu.hash) }
|
|
.dynsym : { *(.dynsym) }
|
|
.dynstr : { *(.dynstr) }
|
|
.gnu.version : { *(.gnu.version) }
|
|
.gnu.version_d : { *(.gnu.version_d) }
|
|
.gnu.version_r : { *(.gnu.version_r) }
|
|
.rel.init : { *(.rel.init) }
|
|
.rela.init : { *(.rela.init) }
|
|
.rel.text : { *(.rel.text .rel.text.* .rel.gnu.linkonce.t.*) }
|
|
.rela.text : { *(.rela.text .rela.text.* .rela.gnu.linkonce.t.*) }
|
|
.rel.fini : { *(.rel.fini) }
|
|
.rela.fini : { *(.rela.fini) }
|
|
.rel.rodata : { *(.rel.rodata .rel.rodata.* .rel.gnu.linkonce.r.*) }
|
|
.rela.rodata : { *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.*) }
|
|
.rel.data.rel.ro : { *(.rel.data.rel.ro* .rel.gnu.linkonce.d.rel.ro.*) }
|
|
.rela.data.rel.ro : { *(.rela.data.rel.ro* .rela.gnu.linkonce.d.rel.ro.*) }
|
|
.rel.data : { *(.rel.data .rel.data.* .rel.gnu.linkonce.d.*) }
|
|
.rela.data : { *(.rela.data .rela.data.* .rela.gnu.linkonce.d.*) }
|
|
.rel.tdata : { *(.rel.tdata .rel.tdata.* .rel.gnu.linkonce.td.*) }
|
|
.rela.tdata : { *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.*) }
|
|
.rel.tbss : { *(.rel.tbss .rel.tbss.* .rel.gnu.linkonce.tb.*) }
|
|
.rela.tbss : { *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.*) }
|
|
.rel.ctors : { *(.rel.ctors) }
|
|
.rela.ctors : { *(.rela.ctors) }
|
|
.rel.dtors : { *(.rel.dtors) }
|
|
.rela.dtors : { *(.rela.dtors) }
|
|
.rel.got : { *(.rel.got) }
|
|
.rela.got : { *(.rela.got) }
|
|
.rel.bss : { *(.rel.bss .rel.bss.* .rel.gnu.linkonce.b.*) }
|
|
.rela.bss : { *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.*) }
|
|
.rel.plt : { *(.rel.plt) }
|
|
.rela.plt : { *(.rela.plt) }
|
|
.init :
|
|
{
|
|
KEEP (*(.init))
|
|
} =0xCCCCCCCC
|
|
.plt : { *(.plt) }
|
|
.text :
|
|
{
|
|
*(.text .stub .text.* .gnu.linkonce.t.*)
|
|
KEEP (*(.text.*personality*))
|
|
/* .gnu.warning sections are handled specially by elf32.em. */
|
|
*(.gnu.warning)
|
|
} =0xCCCCCCCC
|
|
.fini :
|
|
{
|
|
KEEP (*(.fini))
|
|
} =0xCCCCCCCC
|
|
PROVIDE (__etext = .);
|
|
PROVIDE (_etext = .);
|
|
PROVIDE (etext = .);
|
|
.rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
|
|
.rodata1 : { *(.rodata1) }
|
|
.eh_frame_hdr : { *(.eh_frame_hdr) }
|
|
.eh_frame : ONLY_IF_RO { KEEP (*(.eh_frame)) }
|
|
.gcc_except_table : ONLY_IF_RO { *(.gcc_except_table .gcc_except_table.*) }
|
|
/* Adjust the address for the data segment. We want to adjust up to
|
|
the same address within the page on the next page up. */
|
|
. = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE));
|
|
/* Exception handling */
|
|
.eh_frame : ONLY_IF_RW { KEEP (*(.eh_frame)) }
|
|
.gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.*) }
|
|
/* Thread Local Storage sections */
|
|
.tdata : { *(.tdata .tdata.* .gnu.linkonce.td.*) }
|
|
.tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.*) *(.tcommon) }
|
|
.preinit_array :
|
|
{
|
|
PROVIDE_HIDDEN (__preinit_array_start = .);
|
|
KEEP (*(.preinit_array))
|
|
PROVIDE_HIDDEN (__preinit_array_end = .);
|
|
}
|
|
.init_array :
|
|
{
|
|
PROVIDE_HIDDEN (__init_array_start = .);
|
|
KEEP (*(SORT(.init_array.*)))
|
|
KEEP (*(.init_array))
|
|
PROVIDE_HIDDEN (__init_array_end = .);
|
|
}
|
|
.fini_array :
|
|
{
|
|
PROVIDE_HIDDEN (__fini_array_start = .);
|
|
KEEP (*(.fini_array))
|
|
KEEP (*(SORT(.fini_array.*)))
|
|
PROVIDE_HIDDEN (__fini_array_end = .);
|
|
}
|
|
_start_ctors = .;
|
|
PROVIDE (start_ctors = .);
|
|
.ctors :
|
|
{
|
|
/* gcc uses crtbegin.o to find the start of
|
|
the constructors, so we make sure it is
|
|
first. Because this is a wildcard, it
|
|
doesn't matter if the user does not
|
|
actually link against crtbegin.o; the
|
|
linker won't look for a file to match a
|
|
wildcard. The wildcard also means that it
|
|
doesn't matter which directory crtbegin.o
|
|
is in. */
|
|
KEEP (*crtbegin.o(.ctors))
|
|
KEEP (*crtbegin?.o(.ctors))
|
|
/* We don't want to include the .ctor section from
|
|
the crtend.o file until after the sorted ctors.
|
|
The .ctor section from the crtend file contains the
|
|
end of ctors marker and it must be last */
|
|
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .ctors))
|
|
KEEP (*(SORT(.ctors.*)))
|
|
KEEP (*(.ctors))
|
|
}
|
|
_stop_ctors = .;
|
|
PROVIDE (stop_ctors = .);
|
|
.dtors :
|
|
{
|
|
KEEP (*crtbegin.o(.dtors))
|
|
KEEP (*crtbegin?.o(.dtors))
|
|
KEEP (*(EXCLUDE_FILE (*crtend.o *crtend?.o ) .dtors))
|
|
KEEP (*(SORT(.dtors.*)))
|
|
KEEP (*(.dtors))
|
|
}
|
|
.jcr : { KEEP (*(.jcr)) }
|
|
.data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.*) *(.data.rel.ro* .gnu.linkonce.d.rel.ro.*) }
|
|
.dynamic : { *(.dynamic) }
|
|
.got : { *(.got) }
|
|
. = DATA_SEGMENT_RELRO_END (12, .);
|
|
.got.plt : { *(.got.plt) }
|
|
.data :
|
|
{
|
|
*(.data .data.* .gnu.linkonce.d.*)
|
|
KEEP (*(.gnu.linkonce.d.*personality*))
|
|
}
|
|
.data1 : { *(.data1) }
|
|
_edata = .; PROVIDE (edata = .);
|
|
__bss_start = .;
|
|
.bss :
|
|
{
|
|
*(.dynbss)
|
|
*(.bss .bss.* .gnu.linkonce.b.*)
|
|
*(COMMON)
|
|
/* Align here to ensure that the .bss section occupies space up to
|
|
_end. Align after .bss to ensure correct alignment even if the
|
|
.bss section disappears because there are no input sections.
|
|
FIXME: Why do we need it? When there is no .bss section, we don't
|
|
pad the .data section. */
|
|
. = ALIGN(. != 0 ? 32 / 8 : 1);
|
|
}
|
|
. = ALIGN(32 / 8);
|
|
. = ALIGN(32 / 8);
|
|
_end = .; PROVIDE (end = .);
|
|
. = DATA_SEGMENT_END (.);
|
|
/* Stabs debugging sections. */
|
|
.stab 0 : { *(.stab) }
|
|
.stabstr 0 : { *(.stabstr) }
|
|
.stab.excl 0 : { *(.stab.excl) }
|
|
.stab.exclstr 0 : { *(.stab.exclstr) }
|
|
.stab.index 0 : { *(.stab.index) }
|
|
.stab.indexstr 0 : { *(.stab.indexstr) }
|
|
.comment 0 : { *(.comment) }
|
|
/* DWARF debug sections.
|
|
Symbols in the DWARF debugging sections are relative to the beginning
|
|
of the section so we begin them at 0. */
|
|
/* DWARF 1 */
|
|
.debug 0 : { *(.debug) }
|
|
.line 0 : { *(.line) }
|
|
/* GNU DWARF 1 extensions */
|
|
.debug_srcinfo 0 : { *(.debug_srcinfo) }
|
|
.debug_sfnames 0 : { *(.debug_sfnames) }
|
|
/* DWARF 1.1 and DWARF 2 */
|
|
.debug_aranges 0 : { *(.debug_aranges) }
|
|
.debug_pubnames 0 : { *(.debug_pubnames) }
|
|
/* DWARF 2 */
|
|
.debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) }
|
|
.debug_abbrev 0 : { *(.debug_abbrev) }
|
|
.debug_line 0 : { *(.debug_line) }
|
|
.debug_frame 0 : { *(.debug_frame) }
|
|
.debug_str 0 : { *(.debug_str) }
|
|
.debug_loc 0 : { *(.debug_loc) }
|
|
.debug_macinfo 0 : { *(.debug_macinfo) }
|
|
/* SGI/MIPS DWARF 2 extensions */
|
|
.debug_weaknames 0 : { *(.debug_weaknames) }
|
|
.debug_funcnames 0 : { *(.debug_funcnames) }
|
|
.debug_typenames 0 : { *(.debug_typenames) }
|
|
.debug_varnames 0 : { *(.debug_varnames) }
|
|
/* DWARF 3 */
|
|
.debug_pubtypes 0 : { *(.debug_pubtypes) }
|
|
.debug_ranges 0 : { *(.debug_ranges) }
|
|
.gnu.attributes 0 : { KEEP (*(.gnu.attributes)) }
|
|
/DISCARD/ : { *(.note.GNU-stack) }
|
|
}
|