1998-04-06 15:37:21 +00:00
|
|
|
/*-
|
2017-11-27 15:08:52 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
|
|
*
|
2006-05-12 22:41:58 +00:00
|
|
|
* Copyright (c) Peter Wemm
|
1998-04-06 15:37:21 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1999-08-28 01:08:13 +00:00
|
|
|
* $FreeBSD$
|
1998-04-06 15:37:21 +00:00
|
|
|
*/
|
|
|
|
|
2001-12-11 23:33:44 +00:00
|
|
|
#ifndef _MACHINE_PCPU_H_
|
2007-02-06 18:04:02 +00:00
|
|
|
#define _MACHINE_PCPU_H_
|
2000-09-07 01:33:02 +00:00
|
|
|
|
2005-03-02 21:33:29 +00:00
|
|
|
#ifndef _SYS_CDEFS_H_
|
2007-02-06 18:04:02 +00:00
|
|
|
#error "sys/cdefs.h is a prerequisite for this file"
|
2005-03-02 21:33:29 +00:00
|
|
|
#endif
|
|
|
|
|
2000-09-07 01:33:02 +00:00
|
|
|
#include <machine/segments.h>
|
|
|
|
#include <machine/tss.h>
|
|
|
|
|
2016-12-23 15:14:56 +00:00
|
|
|
#include <sys/_lock.h>
|
|
|
|
#include <sys/_mutex.h>
|
|
|
|
|
1998-04-06 15:37:21 +00:00
|
|
|
/*
|
i386 4/4G split.
The change makes the user and kernel address spaces on i386
independent, giving each almost the full 4G of usable virtual addresses
except for one PDE at top used for trampoline and per-CPU trampoline
stacks, and system structures that must be always mapped, namely IDT,
GDT, common TSS and LDT, and process-private TSS and LDT if allocated.
By using 1:1 mapping for the kernel text and data, it appeared
possible to eliminate assembler part of the locore.S which bootstraps
initial page table and KPTmap. The code is rewritten in C and moved
into the pmap_cold(). The comment in vmparam.h explains the KVA
layout.
There is no PCID mechanism available in protected mode, so each
kernel/user switch forth and back completely flushes the TLB, except
for the trampoline PTD region. The TLB invalidations for userspace
becomes trivial, because IPI handlers switch page tables. On the other
hand, context switches no longer need to reload %cr3.
copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for
new copyout(9) is compatibility with wiring user buffers around sysctl
handlers. This explains two kind of locks for copyout ptes and
accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow
path, is only tried after the 'fast path' failed, which temporary
changes mapping to the userspace and copies the data to/from small
per-cpu buffer in the trampoline. If a page fault occurs during the
copy, it is short-circuit by exception.s to not even reach C code.
The change was motivated by the need to implement the Meltdown
mitigation, but instead of KPTI the full split is done. The i386
architecture already shows the sizing problems, in particular, it is
impossible to link clang and lld with debugging. I expect that the
issues due to the virtual address space limits would only exaggerate
and the split gives more liveness to the platform.
Tested by: pho
Discussed with: bde
Sponsored by: The FreeBSD Foundation
MFC after: 1 month
Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
|
|
|
* The SMP parts are setup in pmap.c and machdep.c for the BSP, and
|
|
|
|
* pmap.c and mp_machdep.c sets up the data for the AP's to "see" when
|
|
|
|
* they awake. The reason for doing it via a struct is so that an
|
|
|
|
* array of pointers to each CPU's data can be set up for things like
|
|
|
|
* "check curproc on all other processors"
|
1998-04-06 15:37:21 +00:00
|
|
|
*/
|
2008-08-15 20:51:31 +00:00
|
|
|
|
2001-12-11 23:33:44 +00:00
|
|
|
#define PCPU_MD_FIELDS \
|
2008-04-25 05:18:50 +00:00
|
|
|
char pc_monitorbuf[128] __aligned(128); /* cache line */ \
|
2007-02-06 18:04:02 +00:00
|
|
|
struct pcpu *pc_prvspace; /* Self-reference */ \
|
2003-06-27 21:50:52 +00:00
|
|
|
struct pmap *pc_curpmap; \
|
2001-12-11 23:33:44 +00:00
|
|
|
struct segment_descriptor pc_common_tssd; \
|
|
|
|
struct segment_descriptor *pc_tss_gdt; \
|
2005-04-13 22:57:17 +00:00
|
|
|
struct segment_descriptor *pc_fsgs_gdt; \
|
i386 4/4G split.
The change makes the user and kernel address spaces on i386
independent, giving each almost the full 4G of usable virtual addresses
except for one PDE at top used for trampoline and per-CPU trampoline
stacks, and system structures that must be always mapped, namely IDT,
GDT, common TSS and LDT, and process-private TSS and LDT if allocated.
By using 1:1 mapping for the kernel text and data, it appeared
possible to eliminate assembler part of the locore.S which bootstraps
initial page table and KPTmap. The code is rewritten in C and moved
into the pmap_cold(). The comment in vmparam.h explains the KVA
layout.
There is no PCID mechanism available in protected mode, so each
kernel/user switch forth and back completely flushes the TLB, except
for the trampoline PTD region. The TLB invalidations for userspace
becomes trivial, because IPI handlers switch page tables. On the other
hand, context switches no longer need to reload %cr3.
copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for
new copyout(9) is compatibility with wiring user buffers around sysctl
handlers. This explains two kind of locks for copyout ptes and
accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow
path, is only tried after the 'fast path' failed, which temporary
changes mapping to the userspace and copies the data to/from small
per-cpu buffer in the trampoline. If a page fault occurs during the
copy, it is short-circuit by exception.s to not even reach C code.
The change was motivated by the need to implement the Meltdown
mitigation, but instead of KPTI the full split is done. The i386
architecture already shows the sizing problems, in particular, it is
impossible to link clang and lld with debugging. I expect that the
issues due to the virtual address space limits would only exaggerate
and the split gives more liveness to the platform.
Tested by: pho
Discussed with: bde
Sponsored by: The FreeBSD Foundation
MFC after: 1 month
Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
|
|
|
struct i386tss *pc_common_tssp; \
|
|
|
|
u_int pc_kesp0; \
|
|
|
|
u_int pc_trampstk; \
|
2002-03-27 05:39:23 +00:00
|
|
|
int pc_currentldt; \
|
2007-02-06 18:04:02 +00:00
|
|
|
u_int pc_acpi_id; /* ACPI CPU id */ \
|
2005-12-26 00:07:19 +00:00
|
|
|
u_int pc_apic_id; \
|
2010-05-24 15:45:05 +00:00
|
|
|
int pc_private_tss; /* Flag indicating private tss*/\
|
2013-10-05 23:11:01 +00:00
|
|
|
u_int pc_cmci_mask; /* MCx banks for CMCI */ \
|
2015-04-30 15:48:48 +00:00
|
|
|
u_int pc_vcpu_id; /* Xen vCPU ID */ \
|
2016-12-23 15:14:56 +00:00
|
|
|
struct mtx pc_cmap_lock; \
|
|
|
|
void *pc_cmap_pte1; \
|
|
|
|
void *pc_cmap_pte2; \
|
|
|
|
caddr_t pc_cmap_addr1; \
|
|
|
|
caddr_t pc_cmap_addr2; \
|
2015-08-04 19:46:13 +00:00
|
|
|
vm_offset_t pc_qmap_addr; /* KVA for temporary mappings */\
|
i386 4/4G split.
The change makes the user and kernel address spaces on i386
independent, giving each almost the full 4G of usable virtual addresses
except for one PDE at top used for trampoline and per-CPU trampoline
stacks, and system structures that must be always mapped, namely IDT,
GDT, common TSS and LDT, and process-private TSS and LDT if allocated.
By using 1:1 mapping for the kernel text and data, it appeared
possible to eliminate assembler part of the locore.S which bootstraps
initial page table and KPTmap. The code is rewritten in C and moved
into the pmap_cold(). The comment in vmparam.h explains the KVA
layout.
There is no PCID mechanism available in protected mode, so each
kernel/user switch forth and back completely flushes the TLB, except
for the trampoline PTD region. The TLB invalidations for userspace
becomes trivial, because IPI handlers switch page tables. On the other
hand, context switches no longer need to reload %cr3.
copyout(9) was rewritten to use vm_fault_quick_hold(). An issue for
new copyout(9) is compatibility with wiring user buffers around sysctl
handlers. This explains two kind of locks for copyout ptes and
accounting of the vslock() calls. The vm_fault_quick_hold() AKA slow
path, is only tried after the 'fast path' failed, which temporary
changes mapping to the userspace and copies the data to/from small
per-cpu buffer in the trampoline. If a page fault occurs during the
copy, it is short-circuit by exception.s to not even reach C code.
The change was motivated by the need to implement the Meltdown
mitigation, but instead of KPTI the full split is done. The i386
architecture already shows the sizing problems, in particular, it is
impossible to link clang and lld with debugging. I expect that the
issues due to the virtual address space limits would only exaggerate
and the split gives more liveness to the platform.
Tested by: pho
Discussed with: bde
Sponsored by: The FreeBSD Foundation
MFC after: 1 month
Differential revision: https://reviews.freebsd.org/D14633
2018-04-13 20:30:49 +00:00
|
|
|
vm_offset_t pc_copyout_maddr; \
|
|
|
|
vm_offset_t pc_copyout_saddr; \
|
|
|
|
struct mtx pc_copyout_mlock; \
|
|
|
|
struct sx pc_copyout_slock; \
|
|
|
|
char *pc_copyout_buf; \
|
2018-05-25 16:29:22 +00:00
|
|
|
vm_offset_t pc_pmap_eh_va; \
|
|
|
|
caddr_t pc_pmap_eh_ptep; \
|
2016-10-04 17:01:24 +00:00
|
|
|
uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \
|
2018-05-22 05:09:33 +00:00
|
|
|
uint32_t pc_ibpb_set; \
|
2018-07-06 19:50:44 +00:00
|
|
|
char __pad[3610]
|
2008-08-19 19:53:52 +00:00
|
|
|
|
|
|
|
#ifdef _KERNEL
|
|
|
|
|
2017-11-23 11:40:16 +00:00
|
|
|
#if defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF)
|
2002-10-01 14:01:58 +00:00
|
|
|
|
2001-12-11 23:33:44 +00:00
|
|
|
/*
|
|
|
|
* Evaluates to the byte offset of the per-cpu variable name.
|
|
|
|
*/
|
|
|
|
#define __pcpu_offset(name) \
|
|
|
|
__offsetof(struct pcpu, name)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Evaluates to the type of the per-cpu variable name.
|
|
|
|
*/
|
|
|
|
#define __pcpu_type(name) \
|
|
|
|
__typeof(((struct pcpu *)0)->name)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Evaluates to the address of the per-cpu variable name.
|
|
|
|
*/
|
2003-11-17 04:40:58 +00:00
|
|
|
#define __PCPU_PTR(name) __extension__ ({ \
|
2001-12-11 23:33:44 +00:00
|
|
|
__pcpu_type(name) *__p; \
|
|
|
|
\
|
|
|
|
__asm __volatile("movl %%fs:%1,%0; addl %2,%0" \
|
|
|
|
: "=r" (__p) \
|
|
|
|
: "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace))), \
|
|
|
|
"i" (__pcpu_offset(name))); \
|
|
|
|
\
|
|
|
|
__p; \
|
|
|
|
})
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Evaluates to the value of the per-cpu variable name.
|
|
|
|
*/
|
2003-11-17 04:40:58 +00:00
|
|
|
#define __PCPU_GET(name) __extension__ ({ \
|
2007-02-06 18:04:02 +00:00
|
|
|
__pcpu_type(name) __res; \
|
|
|
|
struct __s { \
|
2009-09-04 07:29:24 +00:00
|
|
|
u_char __b[MIN(sizeof(__res), 4)]; \
|
2007-02-06 18:04:02 +00:00
|
|
|
} __s; \
|
2001-12-11 23:33:44 +00:00
|
|
|
\
|
2007-02-06 18:04:02 +00:00
|
|
|
if (sizeof(__res) == 1 || sizeof(__res) == 2 || \
|
|
|
|
sizeof(__res) == 4) { \
|
2007-02-06 16:21:09 +00:00
|
|
|
__asm __volatile("mov %%fs:%1,%0" \
|
|
|
|
: "=r" (__s) \
|
|
|
|
: "m" (*(struct __s *)(__pcpu_offset(name)))); \
|
2007-02-06 18:04:02 +00:00
|
|
|
*(struct __s *)(void *)&__res = __s; \
|
2001-12-11 23:33:44 +00:00
|
|
|
} else { \
|
2007-02-06 18:04:02 +00:00
|
|
|
__res = *__PCPU_PTR(name); \
|
2001-12-11 23:33:44 +00:00
|
|
|
} \
|
2007-02-06 18:04:02 +00:00
|
|
|
__res; \
|
2001-12-11 23:33:44 +00:00
|
|
|
})
|
|
|
|
|
2007-06-04 21:38:48 +00:00
|
|
|
/*
|
|
|
|
* Adds a value of the per-cpu counter name. The implementation
|
|
|
|
* must be atomic with respect to interrupts.
|
|
|
|
*/
|
|
|
|
#define __PCPU_ADD(name, val) do { \
|
|
|
|
__pcpu_type(name) __val; \
|
|
|
|
struct __s { \
|
2009-09-04 07:29:24 +00:00
|
|
|
u_char __b[MIN(sizeof(__val), 4)]; \
|
2007-06-04 21:38:48 +00:00
|
|
|
} __s; \
|
|
|
|
\
|
|
|
|
__val = (val); \
|
|
|
|
if (sizeof(__val) == 1 || sizeof(__val) == 2 || \
|
|
|
|
sizeof(__val) == 4) { \
|
|
|
|
__s = *(struct __s *)(void *)&__val; \
|
|
|
|
__asm __volatile("add %1,%%fs:%0" \
|
|
|
|
: "=m" (*(struct __s *)(__pcpu_offset(name))) \
|
|
|
|
: "r" (__s)); \
|
|
|
|
} else \
|
|
|
|
*__PCPU_PTR(name) += __val; \
|
|
|
|
} while (0)
|
|
|
|
|
2007-03-11 05:54:29 +00:00
|
|
|
/*
|
|
|
|
* Increments the value of the per-cpu counter name. The implementation
|
|
|
|
* must be atomic with respect to interrupts.
|
|
|
|
*/
|
2007-06-04 21:38:48 +00:00
|
|
|
#define __PCPU_INC(name) do { \
|
2007-03-11 05:54:29 +00:00
|
|
|
CTASSERT(sizeof(__pcpu_type(name)) == 1 || \
|
|
|
|
sizeof(__pcpu_type(name)) == 2 || \
|
|
|
|
sizeof(__pcpu_type(name)) == 4); \
|
|
|
|
if (sizeof(__pcpu_type(name)) == 1) { \
|
|
|
|
__asm __volatile("incb %%fs:%0" \
|
|
|
|
: "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
|
|
|
|
: "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
|
|
|
|
} else if (sizeof(__pcpu_type(name)) == 2) { \
|
|
|
|
__asm __volatile("incw %%fs:%0" \
|
|
|
|
: "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
|
|
|
|
: "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
|
|
|
|
} else if (sizeof(__pcpu_type(name)) == 4) { \
|
|
|
|
__asm __volatile("incl %%fs:%0" \
|
|
|
|
: "=m" (*(__pcpu_type(name) *)(__pcpu_offset(name)))\
|
|
|
|
: "m" (*(__pcpu_type(name) *)(__pcpu_offset(name))));\
|
|
|
|
} \
|
|
|
|
} while (0)
|
|
|
|
|
2001-12-11 23:33:44 +00:00
|
|
|
/*
|
|
|
|
* Sets the value of the per-cpu variable name to value val.
|
|
|
|
*/
|
2009-09-04 05:40:06 +00:00
|
|
|
#define __PCPU_SET(name, val) do { \
|
2007-02-06 18:04:02 +00:00
|
|
|
__pcpu_type(name) __val; \
|
|
|
|
struct __s { \
|
2009-09-04 05:40:06 +00:00
|
|
|
u_char __b[MIN(sizeof(__val), 4)]; \
|
2007-02-06 18:04:02 +00:00
|
|
|
} __s; \
|
2001-12-11 23:33:44 +00:00
|
|
|
\
|
2007-02-06 18:04:02 +00:00
|
|
|
__val = (val); \
|
2007-02-06 16:21:09 +00:00
|
|
|
if (sizeof(__val) == 1 || sizeof(__val) == 2 || \
|
|
|
|
sizeof(__val) == 4) { \
|
|
|
|
__s = *(struct __s *)(void *)&__val; \
|
|
|
|
__asm __volatile("mov %1,%%fs:%0" \
|
|
|
|
: "=m" (*(struct __s *)(__pcpu_offset(name))) \
|
|
|
|
: "r" (__s)); \
|
2001-12-11 23:33:44 +00:00
|
|
|
} else { \
|
|
|
|
*__PCPU_PTR(name) = __val; \
|
|
|
|
} \
|
2009-09-04 05:40:06 +00:00
|
|
|
} while (0)
|
2001-12-11 23:33:44 +00:00
|
|
|
|
2017-01-29 16:54:55 +00:00
|
|
|
#define get_pcpu() __extension__ ({ \
|
|
|
|
struct pcpu *__pc; \
|
|
|
|
\
|
|
|
|
__asm __volatile("movl %%fs:%1,%0" \
|
|
|
|
: "=r" (__pc) \
|
|
|
|
: "m" (*(struct pcpu *)(__pcpu_offset(pc_prvspace)))); \
|
|
|
|
__pc; \
|
|
|
|
})
|
|
|
|
|
2001-12-11 23:33:44 +00:00
|
|
|
#define PCPU_GET(member) __PCPU_GET(pc_ ## member)
|
2007-06-04 21:38:48 +00:00
|
|
|
#define PCPU_ADD(member, val) __PCPU_ADD(pc_ ## member, val)
|
|
|
|
#define PCPU_INC(member) __PCPU_INC(pc_ ## member)
|
2001-12-11 23:33:44 +00:00
|
|
|
#define PCPU_PTR(member) __PCPU_PTR(pc_ ## member)
|
|
|
|
#define PCPU_SET(member, val) __PCPU_SET(pc_ ## member, val)
|
1998-04-06 15:37:21 +00:00
|
|
|
|
2012-07-26 09:11:37 +00:00
|
|
|
#define OFFSETOF_CURTHREAD 0
|
|
|
|
#ifdef __clang__
|
|
|
|
#pragma clang diagnostic push
|
|
|
|
#pragma clang diagnostic ignored "-Wnull-dereference"
|
|
|
|
#endif
|
2010-07-29 18:44:10 +00:00
|
|
|
static __inline __pure2 struct thread *
|
2003-11-20 23:23:22 +00:00
|
|
|
__curthread(void)
|
|
|
|
{
|
|
|
|
struct thread *td;
|
|
|
|
|
2012-07-26 09:11:37 +00:00
|
|
|
__asm("movl %%fs:%1,%0" : "=r" (td)
|
|
|
|
: "m" (*(char *)OFFSETOF_CURTHREAD));
|
2003-11-20 23:23:22 +00:00
|
|
|
return (td);
|
|
|
|
}
|
2012-07-26 09:11:37 +00:00
|
|
|
#ifdef __clang__
|
|
|
|
#pragma clang diagnostic pop
|
|
|
|
#endif
|
2007-02-06 18:04:02 +00:00
|
|
|
#define curthread (__curthread())
|
2003-11-20 23:23:22 +00:00
|
|
|
|
2012-07-26 09:11:37 +00:00
|
|
|
#define OFFSETOF_CURPCB 16
|
|
|
|
static __inline __pure2 struct pcb *
|
|
|
|
__curpcb(void)
|
|
|
|
{
|
|
|
|
struct pcb *pcb;
|
|
|
|
|
|
|
|
__asm("movl %%fs:%1,%0" : "=r" (pcb) : "m" (*(char *)OFFSETOF_CURPCB));
|
|
|
|
return (pcb);
|
|
|
|
}
|
|
|
|
#define curpcb (__curpcb())
|
|
|
|
|
2018-04-25 07:51:41 +00:00
|
|
|
#define IS_BSP() (PCPU_GET(cpuid) == 0)
|
|
|
|
|
2017-11-23 11:40:16 +00:00
|
|
|
#else /* defined(__GNUCLIKE_ASM) && defined(__GNUCLIKE___TYPEOF) */
|
2007-02-06 18:04:02 +00:00
|
|
|
|
|
|
|
#error "this file needs to be ported to your compiler"
|
|
|
|
|
2017-11-23 11:40:16 +00:00
|
|
|
#endif /* __GNUCLIKE_ASM etc. */
|
2002-07-15 13:29:40 +00:00
|
|
|
|
2007-02-06 18:04:02 +00:00
|
|
|
#endif /* _KERNEL */
|
2001-08-16 09:29:35 +00:00
|
|
|
|
2007-02-06 18:04:02 +00:00
|
|
|
#endif /* !_MACHINE_PCPU_H_ */
|