Work-in-progress commit syncing up pmap cleanups that I have been working

on for a while:
- fine grained TLB shootdown for SMP on i386
- ranged TLB shootdowns.. eg: specify a range of pages to shoot down with
  a single IPI, since the IPI is very expensive.  Adjust some callers
  that used to trigger this inside tight loops to do a ranged shootdown
  at the end instead.
- PG_G support for SMP on i386 (options ENABLE_PG_G)
- defer PG_G activation till after we decide what we are going to do with
  PSE and the 4MB pages at the start of the kernel.  This should solve
  some rumored strangeness about stale PG_G entries getting stuck
  underneath the 4MB pages.
- add some instrumentation for the fine TLB shootdown
- convert some asm instruction wrappers from functions to inlines.  gcc
  seems to do a fair bit better with this.
- [temporarily!] pessimize the tlb shootdown IPI handlers.  I will fix
  this again shortly.

This has been working fairly well for me for a while, but I have tweaked
it again prior to commit since my last major testing round.  The only
outstanding problem that I know of is PG_G related, which is why there
is an option for it (not on by default for SMP).  I have seen a world
speedups by a few percent (as much as 4 or 5% in one case) but I have
*not* accurately measured this - I am a bit sceptical of these numbers.
This commit is contained in:
Peter Wemm 2002-02-25 23:49:51 +00:00
parent 12ce63ed2a
commit 6bd95d70db
34 changed files with 2144 additions and 930 deletions

View File

@ -181,30 +181,108 @@ Xspuriousint:
iret iret
/* /*
* Handle TLB shootdowns. * Global address space TLB shootdown.
*/ */
.text .text
SUPERALIGN_TEXT SUPERALIGN_TEXT
.globl Xinvltlb .globl Xinvltlb
Xinvltlb: Xinvltlb:
pushl %eax pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS #ifdef COUNT_XINVLTLB_HITS
pushl %fs pushl %fs
movl $KPSEL, %eax movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs mov %ax, %fs
movl PCPU(CPUID), %eax movl PCPU(CPUID), %eax
popl %fs popl %fs
ss incl xhits_gbl(,%eax,4)
incl _xhits(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */ #endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */ movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3 movl %eax, %cr3
ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
.globl Xinvlpg
Xinvlpg:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
ss
incl xhits_pg(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %eax
invlpg (%eax) /* invalidate single page */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Page range TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvlrng
Xinvlrng:
pushl %eax
pushl %edx
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_rng(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %edx
movl smp_tlb_addr2, %eax
1: invlpg (%edx) /* invalidate single page */
addl $PAGE_SIZE, %edx
cmpl %edx, %eax
jb 1b
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %edx
popl %eax popl %eax
iret iret
@ -443,12 +521,6 @@ Xrendezvous:
.data .data
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
_xhits:
.space (NCPU * 4), 0
#endif /* COUNT_XINVLTLB_HITS */
.globl apic_pin_trigger .globl apic_pin_trigger
apic_pin_trigger: apic_pin_trigger:
.long 0 .long 0

View File

@ -381,12 +381,6 @@ begin:
movl IdlePTD,%esi movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
1:
pushl physfree /* value of first for init386(first) */ pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */ call init386 /* wire 386 chip for unix operation */
@ -809,14 +803,7 @@ no_kernend:
jne map_read_write jne map_read_write
#endif #endif
xorl %edx,%edx xorl %edx,%edx
movl $R(etext),%ecx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 2f
orl $PG_G,%edx
#endif
2: movl $R(etext),%ecx
addl $PAGE_MASK,%ecx addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx) fillkptphys(%edx)
@ -827,13 +814,7 @@ no_kernend:
andl $~PAGE_MASK, %eax andl $~PAGE_MASK, %eax
map_read_write: map_read_write:
movl $PG_RW,%edx movl $PG_RW,%edx
#if !defined(SMP) movl R(KERNend),%ecx
testl $CPUID_PGE, R(cpu_feature)
jz 1f
orl $PG_G,%edx
#endif
1: movl R(KERNend),%ecx
subl %eax,%ecx subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx) fillkptphys(%edx)

View File

@ -381,12 +381,6 @@ begin:
movl IdlePTD,%esi movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
1:
pushl physfree /* value of first for init386(first) */ pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */ call init386 /* wire 386 chip for unix operation */
@ -809,14 +803,7 @@ no_kernend:
jne map_read_write jne map_read_write
#endif #endif
xorl %edx,%edx xorl %edx,%edx
movl $R(etext),%ecx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 2f
orl $PG_G,%edx
#endif
2: movl $R(etext),%ecx
addl $PAGE_MASK,%ecx addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx) fillkptphys(%edx)
@ -827,13 +814,7 @@ no_kernend:
andl $~PAGE_MASK, %eax andl $~PAGE_MASK, %eax
map_read_write: map_read_write:
movl $PG_RW,%edx movl $PG_RW,%edx
#if !defined(SMP) movl R(KERNend),%ecx
testl $CPUID_PGE, R(cpu_feature)
jz 1f
orl $PG_G,%edx
#endif
1: movl R(KERNend),%ecx
subl %eax,%ecx subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx) fillkptphys(%edx)

View File

@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU]; struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
u_int smp_tlb_addr1;
u_int smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/* /*
* Local data and functions. * Local data and functions.
*/ */
@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK #ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN); mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */ #endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
#endif
} }
/* /*
@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */ /* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb, setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */ /* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock, setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */ return 0; /* return FAILURE */
} }
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/* /*
* Flush the TLB on all other CPU's * Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/ */
static void
smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
ipi_all_but_self(vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
{
u_int m;
int i, ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
/* Count the target cpus */
ncpu = 0;
m = mask;
while ((i = ffs(m)) != 0) {
m >>= i;
ncpu++;
}
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void void
smp_invltlb(void) smp_invltlb(void)
{ {
#if defined(APIC_IO) #if defined(APIC_IO)
if (smp_started) if (smp_started) {
ipi_all_but_self(IPI_INVLTLB); smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */ #endif /* APIC_IO */
} }
void void
invlpg(u_int addr) smp_invlpg(u_int addr)
{ {
__asm __volatile("invlpg (%0)"::"r"(addr):"memory"); #if defined(APIC_IO)
if (smp_started) {
/* send a message to the other CPUs */ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_invltlb(); #ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
} }
void void
invltlb(void) smp_invlpg_range(u_int addr1, u_int addr2)
{ {
u_long temp; #if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/* void
* This should be implemented as load_cr3(rcr3()) when load_cr3() is smp_masked_invltlb(u_int mask)
* inlined. {
*/ #if defined(APIC_IO)
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */ void
smp_invltlb(); smp_masked_invlpg(u_int mask, u_int addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
} }
@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */ /* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) { if (smp_cpus == mp_ncpus) {
@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{ {
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
} }

View File

@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU]; struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
u_int smp_tlb_addr1;
u_int smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/* /*
* Local data and functions. * Local data and functions.
*/ */
@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK #ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN); mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */ #endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
#endif
} }
/* /*
@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */ /* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb, setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */ /* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock, setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */ return 0; /* return FAILURE */
} }
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/* /*
* Flush the TLB on all other CPU's * Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/ */
static void
smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
ipi_all_but_self(vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
{
u_int m;
int i, ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
/* Count the target cpus */
ncpu = 0;
m = mask;
while ((i = ffs(m)) != 0) {
m >>= i;
ncpu++;
}
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void void
smp_invltlb(void) smp_invltlb(void)
{ {
#if defined(APIC_IO) #if defined(APIC_IO)
if (smp_started) if (smp_started) {
ipi_all_but_self(IPI_INVLTLB); smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */ #endif /* APIC_IO */
} }
void void
invlpg(u_int addr) smp_invlpg(u_int addr)
{ {
__asm __volatile("invlpg (%0)"::"r"(addr):"memory"); #if defined(APIC_IO)
if (smp_started) {
/* send a message to the other CPUs */ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_invltlb(); #ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
} }
void void
invltlb(void) smp_invlpg_range(u_int addr1, u_int addr2)
{ {
u_long temp; #if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/* void
* This should be implemented as load_cr3(rcr3()) when load_cr3() is smp_masked_invltlb(u_int mask)
* inlined. {
*/ #if defined(APIC_IO)
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */ void
smp_invltlb(); smp_masked_invlpg(u_int mask, u_int addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
} }
@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */ /* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) { if (smp_cpus == mp_ncpus) {
@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{ {
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
} }

View File

@ -85,6 +85,9 @@
#include <sys/user.h> #include <sys/user.h>
#include <sys/vmmeter.h> #include <sys/vmmeter.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#if defined(SMP)
#include <sys/smp.h>
#endif
#include <vm/vm.h> #include <vm/vm.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
@ -101,7 +104,6 @@
#include <machine/md_var.h> #include <machine/md_var.h>
#include <machine/specialreg.h> #include <machine/specialreg.h>
#if defined(SMP) || defined(APIC_IO) #if defined(SMP) || defined(APIC_IO)
#include <machine/smp.h>
#include <machine/apic.h> #include <machine/apic.h>
#include <machine/segments.h> #include <machine/segments.h>
#include <machine/tss.h> #include <machine/tss.h>
@ -259,10 +261,10 @@ static vm_offset_t
pmap_kmem_choose(vm_offset_t addr) pmap_kmem_choose(vm_offset_t addr)
{ {
vm_offset_t newaddr = addr; vm_offset_t newaddr = addr;
#ifndef DISABLE_PSE #ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) { if (cpu_feature & CPUID_PSE)
newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
}
#endif #endif
return newaddr; return newaddr;
} }
@ -367,10 +369,9 @@ pmap_bootstrap(firstaddr, loadaddr)
PTD[i] = 0; PTD[i] = 0;
pgeflag = 0; pgeflag = 0;
#if !defined(SMP) /* XXX - see also mp_machdep.c */ #if !defined(SMP) || defined(ENABLE_PG_G)
if (cpu_feature & CPUID_PGE) { if (cpu_feature & CPUID_PGE)
pgeflag = PG_G; pgeflag = PG_G;
}
#endif #endif
/* /*
@ -383,7 +384,7 @@ pmap_bootstrap(firstaddr, loadaddr)
*/ */
pdir4mb = 0; pdir4mb = 0;
#if !defined(DISABLE_PSE) #ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) { if (cpu_feature & CPUID_PSE) {
pd_entry_t ptditmp; pd_entry_t ptditmp;
/* /*
@ -394,57 +395,64 @@ pmap_bootstrap(firstaddr, loadaddr)
ptditmp &= ~(NBPDR - 1); ptditmp &= ~(NBPDR - 1);
ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
pdir4mb = ptditmp; pdir4mb = ptditmp;
#if !defined(SMP)
/*
* Enable the PSE mode.
*/
load_cr4(rcr4() | CR4_PSE);
/*
* We can do the mapping here for the single processor
* case. We simply ignore the old page table page from
* now on.
*/
/*
* For SMP, we still need 4K pages to bootstrap APs,
* PSE will be enabled as soon as all APs are up.
*/
PTD[KPTDI] = (pd_entry_t) ptditmp;
kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
invltlb();
#endif
} }
#endif #endif
#ifndef SMP
/*
* Turn on PGE/PSE. SMP does this later on since the
* 4K page tables are required for AP boot (for now).
* XXX fixme.
*/
pmap_set_opt();
#endif
#ifdef SMP #ifdef SMP
if (cpu_apic_address == 0) if (cpu_apic_address == 0)
panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
/* local apic is mapped on last page */ /* local apic is mapped on last page */
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
(cpu_apic_address & PG_FRAME)); (cpu_apic_address & PG_FRAME));
#endif #endif
cpu_invltlb();
invltlb();
} }
#ifdef SMP
/* /*
* Set 4mb pdir for mp startup * Enable 4MB page mode for MP startup. Turn on PG_G support.
* BSP will run this after all the AP's have started up.
*/ */
void void
pmap_set_opt(void) pmap_set_opt(void)
{ {
if (pseflag && (cpu_feature & CPUID_PSE)) { pt_entry_t *pte;
vm_offset_t va;
if (pgeflag && (cpu_feature & CPUID_PGE))
load_cr4(rcr4() | CR4_PGE);
#ifndef DISABLE_PSE
if (pseflag && (cpu_feature & CPUID_PSE))
load_cr4(rcr4() | CR4_PSE); load_cr4(rcr4() | CR4_PSE);
if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
cpu_invltlb();
}
}
}
#endif #endif
if (PCPU_GET(cpuid) == 0) {
#ifndef DISABLE_PSE
if (pdir4mb)
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
#endif
if (pgeflag) {
/* XXX see earlier comments about virtual_avail */
for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE)
{
pte = vtopte(va);
if (*pte)
*pte |= pgeflag;
}
}
/*
* for SMP, this will cause all cpus to reload again, which
* is actually what we want since they now have CR4_PGE on.
*/
invltlb();
} else
cpu_invltlb();
}
/* /*
* Initialize the pmap module. * Initialize the pmap module.
@ -552,27 +560,37 @@ pmap_track_modified(vm_offset_t va)
return 0; return 0;
} }
static PMAP_INLINE void
invltlb_1pg(vm_offset_t va)
{
#ifdef I386_CPU
invltlb();
#else
invlpg(va);
#endif
}
static __inline void static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va) pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{ {
#if defined(SMP) #if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask)) u_int cpumask;
cpu_invlpg((void *)va); u_int other_cpus;
if (pmap->pm_active & PCPU_GET(other_cpus)) struct thread *td;
smp_invltlb();
td = curthread;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (td->td_critnest == 1)
cpu_critical_exit(td->td_savecrit);
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invlpg(va); /* global */
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
cpu_invlpg(va);
if (pmap->pm_active & other_cpus)
smp_masked_invlpg(pmap->pm_active & other_cpus, va);
}
critical_exit();
#else #else
if (pmap->pm_active) if (pmap->pm_active)
invltlb_1pg(va); cpu_invlpg(va);
#endif #endif
} }
@ -580,10 +598,30 @@ static __inline void
pmap_invalidate_all(pmap_t pmap) pmap_invalidate_all(pmap_t pmap)
{ {
#if defined(SMP) #if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask)) u_int cpumask;
cpu_invltlb(); u_int other_cpus;
if (pmap->pm_active & PCPU_GET(other_cpus)) struct thread *td;
smp_invltlb();
td = curthread;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (td->td_critnest == 1)
cpu_critical_exit(td->td_savecrit);
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invltlb(); /* global */
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
cpu_invltlb();
if (pmap->pm_active & other_cpus)
smp_masked_invltlb(pmap->pm_active & other_cpus);
}
critical_exit();
#else #else
if (pmap->pm_active) if (pmap->pm_active)
invltlb(); invltlb();
@ -609,12 +647,7 @@ get_ptbase(pmap)
/* otherwise, we are alternate address space */ /* otherwise, we are alternate address space */
if (frame != (APTDpde & PG_FRAME)) { if (frame != (APTDpde & PG_FRAME)) {
APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb(); invltlb();
#endif
} }
return APTmap; return APTmap;
} }
@ -643,7 +676,7 @@ pmap_pte_quick(pmap, va)
newpf = pde & PG_FRAME; newpf = pde & PG_FRAME;
if (((*PMAP1) & PG_FRAME) != newpf) { if (((*PMAP1) & PG_FRAME) != newpf) {
*PMAP1 = newpf | PG_RW | PG_V; *PMAP1 = newpf | PG_RW | PG_V;
invltlb_1pg((vm_offset_t) PADDR1); pmap_invalidate_page(pmap, (vm_offset_t) PADDR1);
} }
return PADDR1 + (index & (NPTEPG - 1)); return PADDR1 + (index & (NPTEPG - 1));
} }
@ -689,20 +722,17 @@ pmap_extract(pmap, va)
/* /*
* add a wired page to the kva * add a wired page to the kva
* note that in order for the mapping to take effect -- you
* should do a invltlb after doing the pmap_kenter...
*/ */
PMAP_INLINE void PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa) pmap_kenter(vm_offset_t va, vm_offset_t pa)
{ {
pt_entry_t *pte; pt_entry_t *pte;
pt_entry_t npte, opte; pt_entry_t npte;
npte = pa | PG_RW | PG_V | pgeflag; npte = pa | PG_RW | PG_V | pgeflag;
pte = vtopte(va); pte = vtopte(va);
opte = *pte;
*pte = npte; *pte = npte;
invltlb_1pg(va); invlpg(va);
} }
/* /*
@ -715,7 +745,7 @@ pmap_kremove(vm_offset_t va)
pte = vtopte(va); pte = vtopte(va);
*pte = 0; *pte = 0;
invltlb_1pg(va); invlpg(va);
} }
/* /*
@ -733,13 +763,17 @@ pmap_kremove(vm_offset_t va)
vm_offset_t vm_offset_t
pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
{ {
vm_offset_t sva = *virt; vm_offset_t va, sva;
vm_offset_t va = sva; pt_entry_t *pte;
va = sva = *virt;
while (start < end) { while (start < end) {
pmap_kenter(va, start); pte = vtopte(va);
*pte = start | PG_RW | PG_V | pgeflag;
va += PAGE_SIZE; va += PAGE_SIZE;
start += PAGE_SIZE; start += PAGE_SIZE;
} }
invlpg_range(sva, end);
*virt = va; *virt = va;
return (sva); return (sva);
} }
@ -754,28 +788,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
* over. The page *must* be wired. * over. The page *must* be wired.
*/ */
void void
pmap_qenter(vm_offset_t va, vm_page_t *m, int count) pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{ {
vm_offset_t end_va; vm_offset_t va, end_va;
pt_entry_t *pte;
va = sva;
end_va = va + count * PAGE_SIZE; end_va = va + count * PAGE_SIZE;
while (va < end_va) {
pt_entry_t *pte;
while (va < end_va) {
pte = vtopte(va); pte = vtopte(va);
*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va += PAGE_SIZE; va += PAGE_SIZE;
m++; m++;
} }
#ifdef SMP invlpg_range(sva, end_va);
smp_invltlb();
#endif
} }
/* /*
@ -783,27 +810,20 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
* kernel -- it is meant only for temporary mappings. * kernel -- it is meant only for temporary mappings.
*/ */
void void
pmap_qremove(vm_offset_t va, int count) pmap_qremove(vm_offset_t sva, int count)
{ {
vm_offset_t end_va; pt_entry_t *pte;
vm_offset_t va, end_va;
end_va = va + count*PAGE_SIZE; va = sva;
end_va = va + count * PAGE_SIZE;
while (va < end_va) { while (va < end_va) {
pt_entry_t *pte;
pte = vtopte(va); pte = vtopte(va);
*pte = 0; *pte = 0;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va += PAGE_SIZE; va += PAGE_SIZE;
} }
#ifdef SMP invlpg_range(sva, end_va);
smp_invltlb();
#endif
} }
static vm_page_t static vm_page_t
@ -824,9 +844,6 @@ pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
void void
pmap_new_proc(struct proc *p) pmap_new_proc(struct proc *p)
{ {
#ifdef I386_CPU
int updateneeded = 0;
#endif
int i; int i;
vm_object_t upobj; vm_object_t upobj;
vm_offset_t up; vm_offset_t up;
@ -870,23 +887,14 @@ pmap_new_proc(struct proc *p)
* Enter the page into the kernel address space. * Enter the page into the kernel address space.
*/ */
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
if (oldpte) { if (oldpte)
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(up + i * PAGE_SIZE); invlpg(up + i * PAGE_SIZE);
#endif
}
vm_page_wakeup(m); vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO); vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL;
} }
#ifdef I386_CPU
if (updateneeded)
invltlb();
#endif
} }
/* /*
@ -901,7 +909,7 @@ pmap_dispose_proc(p)
vm_object_t upobj; vm_object_t upobj;
vm_offset_t up; vm_offset_t up;
vm_page_t m; vm_page_t m;
pt_entry_t *ptek, oldpte; pt_entry_t *ptek;
upobj = p->p_upages_obj; upobj = p->p_upages_obj;
up = (vm_offset_t)p->p_uarea; up = (vm_offset_t)p->p_uarea;
@ -911,17 +919,11 @@ pmap_dispose_proc(p)
if (m == NULL) if (m == NULL)
panic("pmap_dispose_proc: upage already missing?"); panic("pmap_dispose_proc: upage already missing?");
vm_page_busy(m); vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0; *(ptek + i) = 0;
#ifndef I386_CPU
invlpg(up + i * PAGE_SIZE); invlpg(up + i * PAGE_SIZE);
#endif
vm_page_unwire(m, 0); vm_page_unwire(m, 0);
vm_page_free(m); vm_page_free(m);
} }
#ifdef I386_CPU
invltlb();
#endif
} }
/* /*
@ -986,9 +988,6 @@ pmap_swapin_proc(p)
void void
pmap_new_thread(struct thread *td) pmap_new_thread(struct thread *td)
{ {
#ifdef I386_CPU
int updateneeded = 0;
#endif
int i; int i;
vm_object_t ksobj; vm_object_t ksobj;
vm_page_t m; vm_page_t m;
@ -1019,13 +1018,8 @@ pmap_new_thread(struct thread *td)
ptek = vtopte(ks - PAGE_SIZE); ptek = vtopte(ks - PAGE_SIZE);
oldpte = *ptek; oldpte = *ptek;
*ptek = 0; *ptek = 0;
if (oldpte) { if (oldpte)
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks - PAGE_SIZE); invlpg(ks - PAGE_SIZE);
#endif
}
ptek++; ptek++;
#else #else
/* get a kernel virtual address for the kstack for this thread */ /* get a kernel virtual address for the kstack for this thread */
@ -1055,23 +1049,14 @@ pmap_new_thread(struct thread *td)
* Enter the page into the kernel address space. * Enter the page into the kernel address space.
*/ */
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
if (oldpte) { if (oldpte)
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks + i * PAGE_SIZE); invlpg(ks + i * PAGE_SIZE);
#endif
}
vm_page_wakeup(m); vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO); vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL;
} }
#ifdef I386_CPU
if (updateneeded)
invltlb();
#endif
} }
/* /*
@ -1086,7 +1071,7 @@ pmap_dispose_thread(td)
vm_object_t ksobj; vm_object_t ksobj;
vm_offset_t ks; vm_offset_t ks;
vm_page_t m; vm_page_t m;
pt_entry_t *ptek, oldpte; pt_entry_t *ptek;
ksobj = td->td_kstack_obj; ksobj = td->td_kstack_obj;
ks = td->td_kstack; ks = td->td_kstack;
@ -1096,17 +1081,11 @@ pmap_dispose_thread(td)
if (m == NULL) if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?"); panic("pmap_dispose_thread: kstack already missing?");
vm_page_busy(m); vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0; *(ptek + i) = 0;
#ifndef I386_CPU
invlpg(ks + i * PAGE_SIZE); invlpg(ks + i * PAGE_SIZE);
#endif
vm_page_unwire(m, 0); vm_page_unwire(m, 0);
vm_page_free(m); vm_page_free(m);
} }
#ifdef I386_CPU
invltlb();
#endif
} }
/* /*
@ -2207,13 +2186,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
if ((origpte & PG_RW) == 0) { if ((origpte & PG_RW) == 0) {
*pte |= PG_RW; *pte |= PG_RW;
#ifdef SMP pmap_invalidate_page(pmap, va);
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
} }
return; return;
} }
@ -2281,13 +2254,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((origpte & ~(PG_M|PG_A)) != newpte) { if ((origpte & ~(PG_M|PG_A)) != newpte) {
*pte = newpte | PG_A; *pte = newpte | PG_A;
/*if (origpte)*/ { /*if (origpte)*/ {
#ifdef SMP pmap_invalidate_page(pmap, va);
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
} }
} }
} }
@ -2710,7 +2677,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t pdnxt; vm_offset_t pdnxt;
pd_entry_t src_frame, dst_frame; pd_entry_t src_frame, dst_frame;
vm_page_t m; vm_page_t m;
pd_entry_t saved_pde;
if (dst_addr != src_addr) if (dst_addr != src_addr)
return; return;
@ -2720,17 +2686,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
return; return;
dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
if (dst_frame != (APTDpde & PG_FRAME)) { for (addr = src_addr; addr < end_addr; addr = pdnxt) {
APTDpde = dst_frame | PG_RW | PG_V;
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb();
#endif
}
saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V);
for(addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte; pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte; vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr; pd_entry_t srcptepaddr;
@ -2771,6 +2727,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pdnxt > end_addr) if (pdnxt > end_addr)
pdnxt = end_addr; pdnxt = end_addr;
/*
* Have to recheck this before every avtopte() call below
* in case we have blocked and something else used APTDpde.
*/
if (dst_frame != (APTDpde & PG_FRAME)) {
APTDpde = dst_frame | PG_RW | PG_V;
invltlb();
}
src_pte = vtopte(addr); src_pte = vtopte(addr);
dst_pte = avtopte(addr); dst_pte = avtopte(addr);
while (addr < pdnxt) { while (addr < pdnxt) {
@ -2786,16 +2750,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block. * block.
*/ */
dstmpte = pmap_allocpte(dst_pmap, addr); dstmpte = pmap_allocpte(dst_pmap, addr);
if ((APTDpde & PG_FRAME) !=
(saved_pde & PG_FRAME)) {
APTDpde = saved_pde;
printf ("IT HAPPENNED!");
#if defined(SMP)
cpu_invltlb();
#else
invltlb();
#endif
}
if ((*dst_pte == 0) && (ptetemp = *src_pte)) { if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/* /*
* Clear the modified and * Clear the modified and
@ -2839,12 +2793,15 @@ void
pmap_zero_page(vm_offset_t phys) pmap_zero_page(vm_offset_t phys)
{ {
#ifdef SMP
/* XXX overkill, we only want to disable migration here */
/* XXX or maybe not. down the track we have reentrancy issues */
critical_enter();
#endif
if (*CMAP2) if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy"); panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2); cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU) #if defined(I686_CPU)
if (cpu_class == CPUCLASS_686) if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2); i686_pagezero(CADDR2);
@ -2852,6 +2809,9 @@ pmap_zero_page(vm_offset_t phys)
#endif #endif
bzero(CADDR2, PAGE_SIZE); bzero(CADDR2, PAGE_SIZE);
*CMAP2 = 0; *CMAP2 = 0;
#ifdef SMP
critical_exit();
#endif
} }
/* /*
@ -2864,12 +2824,15 @@ void
pmap_zero_page_area(vm_offset_t phys, int off, int size) pmap_zero_page_area(vm_offset_t phys, int off, int size)
{ {
#ifdef SMP
/* XXX overkill, we only want to disable migration here */
/* XXX or maybe not. down the track we have reentrancy issues */
critical_enter();
#endif
if (*CMAP2) if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy"); panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2); cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU) #if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2); i686_pagezero(CADDR2);
@ -2877,6 +2840,9 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size)
#endif #endif
bzero((char *)CADDR2 + off, size); bzero((char *)CADDR2 + off, size);
*CMAP2 = 0; *CMAP2 = 0;
#ifdef SMP
critical_exit();
#endif
} }
/* /*
@ -2889,6 +2855,11 @@ void
pmap_copy_page(vm_offset_t src, vm_offset_t dst) pmap_copy_page(vm_offset_t src, vm_offset_t dst)
{ {
#ifdef SMP
/* XXX overkill, we only want to disable migration here */
/* XXX or maybe not. down the track we have reentrancy issues */
critical_enter();
#endif
if (*CMAP1) if (*CMAP1)
panic("pmap_copy_page: CMAP1 busy"); panic("pmap_copy_page: CMAP1 busy");
if (*CMAP2) if (*CMAP2)
@ -2896,17 +2867,14 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst)
*CMAP1 = PG_V | (src & PG_FRAME) | PG_A; *CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
*CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
#ifdef I386_CPU cpu_invlpg((u_int)CADDR1); /* SMP: local only */
invltlb(); cpu_invlpg((u_int)CADDR2); /* SMP: local only */
#else
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
#endif
bcopy(CADDR1, CADDR2, PAGE_SIZE); bcopy(CADDR1, CADDR2, PAGE_SIZE);
*CMAP1 = 0; *CMAP1 = 0;
*CMAP2 = 0; *CMAP2 = 0;
#ifdef SMP
critical_exit();
#endif
} }
@ -3322,14 +3290,13 @@ pmap_mapdev(pa, size)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
pa = pa & PG_FRAME; pa = pa & PG_FRAME;
for (tmpva = va; size > 0;) { for (tmpva = va; size > 0; ) {
pte = vtopte(tmpva); pte = vtopte(tmpva);
*pte = pa | PG_RW | PG_V | pgeflag; *pte = pa | PG_RW | PG_V | pgeflag;
size -= PAGE_SIZE; size -= PAGE_SIZE;
tmpva += PAGE_SIZE; tmpva += PAGE_SIZE;
pa += PAGE_SIZE;
} }
invltlb(); invlpg_range(va, tmpva);
return ((void *)(va + offset)); return ((void *)(va + offset));
} }
@ -3339,11 +3306,20 @@ pmap_unmapdev(va, size)
vm_offset_t va; vm_offset_t va;
vm_size_t size; vm_size_t size;
{ {
vm_offset_t base, offset; vm_offset_t base, offset, tmpva;
pt_entry_t *pte;
base = va & PG_FRAME; base = va & PG_FRAME;
offset = va & PAGE_MASK; offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE); size = roundup(offset + size, PAGE_SIZE);
for (tmpva = base; size > 0; ) {
pte = vtopte(tmpva);
*pte = 0;
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
}
invlpg_range(va, tmpva);
kmem_free(kernel_map, base, size); kmem_free(kernel_map, base, size);
} }

View File

@ -1591,42 +1591,6 @@ ENTRY(ssdtosd)
popl %ebx popl %ebx
ret ret
/* load_cr0(cr0) */
ENTRY(load_cr0)
movl 4(%esp),%eax
movl %eax,%cr0
ret
/* rcr0() */
ENTRY(rcr0)
movl %cr0,%eax
ret
/* rcr3() */
ENTRY(rcr3)
movl %cr3,%eax
ret
/* void load_cr3(caddr_t cr3) */
ENTRY(load_cr3)
#ifdef SWTCH_OPTIM_STATS
incl tlb_flush_count
#endif
movl 4(%esp),%eax
movl %eax,%cr3
ret
/* rcr4() */
ENTRY(rcr4)
movl %cr4,%eax
ret
/* void load_cr4(caddr_t cr4) */
ENTRY(load_cr4)
movl 4(%esp),%eax
movl %eax,%cr4
ret
/* void reset_dbregs() */ /* void reset_dbregs() */
ENTRY(reset_dbregs) ENTRY(reset_dbregs)
movl $0,%eax movl $0,%eax

View File

@ -1591,42 +1591,6 @@ ENTRY(ssdtosd)
popl %ebx popl %ebx
ret ret
/* load_cr0(cr0) */
ENTRY(load_cr0)
movl 4(%esp),%eax
movl %eax,%cr0
ret
/* rcr0() */
ENTRY(rcr0)
movl %cr0,%eax
ret
/* rcr3() */
ENTRY(rcr3)
movl %cr3,%eax
ret
/* void load_cr3(caddr_t cr3) */
ENTRY(load_cr3)
#ifdef SWTCH_OPTIM_STATS
incl tlb_flush_count
#endif
movl 4(%esp),%eax
movl %eax,%cr3
ret
/* rcr4() */
ENTRY(rcr4)
movl %cr4,%eax
ret
/* void load_cr4(caddr_t cr4) */
ENTRY(load_cr4)
movl 4(%esp),%eax
movl %eax,%cr4
ret
/* void reset_dbregs() */ /* void reset_dbregs() */
ENTRY(reset_dbregs) ENTRY(reset_dbregs)
movl $0,%eax movl $0,%eax

View File

@ -227,62 +227,6 @@ invd(void)
__asm __volatile("invd"); __asm __volatile("invd");
} }
#if defined(SMP) && defined(_KERNEL)
/*
* When using APIC IPI's, invlpg() is not simply the invlpg instruction
* (this is a bug) and the inlining cost is prohibitive since the call
* executes into the IPI transmission system.
*/
void invlpg __P((u_int addr));
void invltlb __P((void));
static __inline void
cpu_invlpg(void *addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
cpu_invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
#else /* !(SMP && _KERNEL) */
static __inline void
invlpg(u_int addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#ifdef SWTCH_OPTIM_STATS
++tlb_flush_count;
#endif
}
#endif /* SMP && _KERNEL */
static __inline u_short static __inline u_short
inw(u_int port) inw(u_int port)
{ {
@ -347,15 +291,6 @@ outw(u_int port, u_short data)
__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port)); __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
} }
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline u_int static __inline u_int
read_eflags(void) read_eflags(void)
{ {
@ -420,6 +355,162 @@ wrmsr(u_int msr, u_int64_t newval)
__asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); __asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
} }
static __inline void
load_cr0(u_int data)
{
__asm __volatile("movl %0,%%cr0" : : "r" (data));
}
static __inline u_int
rcr0(void)
{
u_int data;
__asm __volatile("movl %%cr0,%0" : "=r" (data));
return (data);
}
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr3(u_int data)
{
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
static __inline u_int
rcr3(void)
{
u_int data;
__asm __volatile("movl %%cr3,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr4(u_int data)
{
__asm __volatile("movl %0,%%cr4" : : "r" (data));
}
static __inline u_int
rcr4(void)
{
u_int data;
__asm __volatile("movl %%cr4,%0" : "=r" (data));
return (data);
}
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
cpu_invltlb(void)
{
load_cr3(rcr3());
}
/*
* TLB flush for an individual page (even if it has PG_G).
* Only works on 486+ CPUs (i386 does not have PG_G).
*/
static __inline void
cpu_invlpg(u_int addr)
{
#ifndef I386_CPU
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
#else
cpu_invltlb();
#endif
}
#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
/*
* Same as above but for a range of pages.
*/
static __inline void
cpu_invlpg_range(u_int startva, u_int endva)
{
#ifndef I386_CPU
u_int addr;
for (addr = startva; addr < endva; addr += PAGE_SIZE)
__asm __volatile("invlpg %0" : : "m" (*(char *)addr));
__asm __volatile("" : : : "memory");
#else
cpu_invltlb();
#endif
}
#endif
#ifdef SMP
extern void smp_invlpg(u_int addr);
extern void smp_masked_invlpg(u_int mask, u_int addr);
#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
extern void smp_invlpg_range(u_int startva, u_int endva);
extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva);
#endif
extern void smp_invltlb(void);
extern void smp_masked_invltlb(u_int mask);
#endif
/*
* Generic page TLB flush. Takes care of SMP.
*/
static __inline void
invlpg(u_int addr)
{
cpu_invlpg(addr);
#ifdef SMP
smp_invlpg(addr);
#endif
}
#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
/*
* Generic TLB flush for a range of pages. Takes care of SMP.
* Saves many IPIs for SMP mode.
*/
static __inline void
invlpg_range(u_int startva, u_int endva)
{
cpu_invlpg_range(startva, endva);
#ifdef SMP
smp_invlpg_range(startva, endva);
#endif
}
#endif
/*
* Generic global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
invltlb(void)
{
cpu_invltlb();
#ifdef SMP
smp_invltlb();
#endif
}
static __inline u_int static __inline u_int
rfs(void) rfs(void)
{ {
@ -581,6 +672,8 @@ cpu_critical_exit(critical_t eflags)
int breakpoint __P((void)); int breakpoint __P((void));
u_int bsfl __P((u_int mask)); u_int bsfl __P((u_int mask));
u_int bsrl __P((u_int mask)); u_int bsrl __P((u_int mask));
void cpu_invlpg __P((u_int addr));
void cpu_invlpg_range __P((u_int start, u_int end));
void disable_intr __P((void)); void disable_intr __P((void));
void do_cpuid __P((u_int ax, u_int *p)); void do_cpuid __P((u_int ax, u_int *p));
void enable_intr __P((void)); void enable_intr __P((void));
@ -591,15 +684,26 @@ void insl __P((u_int port, void *addr, size_t cnt));
void insw __P((u_int port, void *addr, size_t cnt)); void insw __P((u_int port, void *addr, size_t cnt));
void invd __P((void)); void invd __P((void));
void invlpg __P((u_int addr)); void invlpg __P((u_int addr));
void invlpg_range __P((u_int start, u_int end));
void invltlb __P((void)); void invltlb __P((void));
u_short inw __P((u_int port)); u_short inw __P((u_int port));
void load_cr0 __P((u_int cr0));
void load_cr3 __P((u_int cr3));
void load_cr4 __P((u_int cr4));
void load_fs __P((u_int sel));
void load_gs __P((u_int sel));
void outb __P((u_int port, u_char data)); void outb __P((u_int port, u_char data));
void outl __P((u_int port, u_int data)); void outl __P((u_int port, u_int data));
void outsb __P((u_int port, void *addr, size_t cnt)); void outsb __P((u_int port, void *addr, size_t cnt));
void outsl __P((u_int port, void *addr, size_t cnt)); void outsl __P((u_int port, void *addr, size_t cnt));
void outsw __P((u_int port, void *addr, size_t cnt)); void outsw __P((u_int port, void *addr, size_t cnt));
void outw __P((u_int port, u_short data)); void outw __P((u_int port, u_short data));
u_int rcr0 __P((void));
u_int rcr2 __P((void)); u_int rcr2 __P((void));
u_int rcr3 __P((void));
u_int rcr4 __P((void));
u_int rfs __P((void));
u_int rgs __P((void));
u_int64_t rdmsr __P((u_int msr)); u_int64_t rdmsr __P((u_int msr));
u_int64_t rdpmc __P((u_int pmc)); u_int64_t rdpmc __P((u_int pmc));
u_int64_t rdtsc __P((void)); u_int64_t rdtsc __P((void));
@ -607,22 +711,12 @@ u_int read_eflags __P((void));
void wbinvd __P((void)); void wbinvd __P((void));
void write_eflags __P((u_int ef)); void write_eflags __P((u_int ef));
void wrmsr __P((u_int msr, u_int64_t newval)); void wrmsr __P((u_int msr, u_int64_t newval));
u_int rfs __P((void));
u_int rgs __P((void));
void load_fs __P((u_int sel));
void load_gs __P((u_int sel));
critical_t cpu_critical_enter __P((void)); critical_t cpu_critical_enter __P((void));
void cpu_critical_exit __P((critical_t eflags)); void cpu_critical_exit __P((critical_t eflags));
#endif /* __GNUC__ */ #endif /* __GNUC__ */
void load_cr0 __P((u_int cr0));
void load_cr3 __P((u_int cr3));
void load_cr4 __P((u_int cr4));
void ltr __P((u_short sel)); void ltr __P((u_short sel));
u_int rcr0 __P((void));
u_int rcr3 __P((void));
u_int rcr4 __P((void));
void reset_dbregs __P((void)); void reset_dbregs __P((void));
__END_DECLS __END_DECLS

View File

@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU]; struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
u_int smp_tlb_addr1;
u_int smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/* /*
* Local data and functions. * Local data and functions.
*/ */
@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK #ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN); mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */ #endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
#endif
} }
/* /*
@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */ /* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb, setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */ /* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock, setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */ return 0; /* return FAILURE */
} }
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/* /*
* Flush the TLB on all other CPU's * Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/ */
static void
smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
ipi_all_but_self(vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
{
u_int m;
int i, ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
/* Count the target cpus */
ncpu = 0;
m = mask;
while ((i = ffs(m)) != 0) {
m >>= i;
ncpu++;
}
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void void
smp_invltlb(void) smp_invltlb(void)
{ {
#if defined(APIC_IO) #if defined(APIC_IO)
if (smp_started) if (smp_started) {
ipi_all_but_self(IPI_INVLTLB); smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */ #endif /* APIC_IO */
} }
void void
invlpg(u_int addr) smp_invlpg(u_int addr)
{ {
__asm __volatile("invlpg (%0)"::"r"(addr):"memory"); #if defined(APIC_IO)
if (smp_started) {
/* send a message to the other CPUs */ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_invltlb(); #ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
} }
void void
invltlb(void) smp_invlpg_range(u_int addr1, u_int addr2)
{ {
u_long temp; #if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/* void
* This should be implemented as load_cr3(rcr3()) when load_cr3() is smp_masked_invltlb(u_int mask)
* inlined. {
*/ #if defined(APIC_IO)
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */ void
smp_invltlb(); smp_masked_invlpg(u_int mask, u_int addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
} }
@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */ /* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) { if (smp_cpus == mp_ncpus) {
@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{ {
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
} }

View File

@ -267,9 +267,7 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t));
void pmap_unmapdev __P((vm_offset_t, vm_size_t)); void pmap_unmapdev __P((vm_offset_t, vm_size_t));
pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2; pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2;
vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t)); vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t));
#ifdef SMP
void pmap_set_opt __P((void)); void pmap_set_opt __P((void));
#endif
#endif /* _KERNEL */ #endif /* _KERNEL */

View File

@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */
* Interprocessor interrupts for SMP. * Interprocessor interrupts for SMP.
*/ */
#define IPI_INVLTLB XINVLTLB_OFFSET #define IPI_INVLTLB XINVLTLB_OFFSET
#define IPI_INVLPG XINVLPG_OFFSET
#define IPI_INVLRNG XINVLRNG_OFFSET
#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET #define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET
#define IPI_AST XCPUAST_OFFSET #define IPI_AST XCPUAST_OFFSET
#define IPI_STOP XCPUSTOP_OFFSET #define IPI_STOP XCPUSTOP_OFFSET
@ -107,7 +109,6 @@ void assign_apic_irq __P((int apic, int intpin, int irq));
void revoke_apic_irq __P((int irq)); void revoke_apic_irq __P((int irq));
void bsp_apic_configure __P((void)); void bsp_apic_configure __P((void));
void init_secondary __P((void)); void init_secondary __P((void));
void smp_invltlb __P((void));
void forward_statclock __P((void)); void forward_statclock __P((void));
void forwarded_statclock __P((struct trapframe frame)); void forwarded_statclock __P((struct trapframe frame));
void forward_hardclock __P((void)); void forward_hardclock __P((void));

View File

@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
} }
else { else {
vector = TPR_SLOW_INTS + intr; vector = TPR_SLOW_INTS + intr;
#ifdef APIC_INTR_REORDER
#ifdef APIC_INTR_HIGHPRI_CLOCK
/* XXX: Hack (kludge?) for more accurate clock. */
if (intr == apic_8254_intr || intr == 8) {
vector = TPR_FAST_INTS + intr;
}
#endif
#endif
setidt(vector, slowintr[intr], setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
} }

View File

@ -88,6 +88,7 @@
/* IDT vector base for regular (aka. slow) and fast interrupts */ /* IDT vector base for regular (aka. slow) and fast interrupts */
#define TPR_SLOW_INTS 0x20 #define TPR_SLOW_INTS 0x20
#define TPR_FAST_INTS 0x60 #define TPR_FAST_INTS 0x60
/* XXX note that the AST interrupt is at 0x50 */
/* blocking values for local APIC Task Priority Register */ /* blocking values for local APIC Task Priority Register */
#define TPR_BLOCK_HWI 0x4f /* hardware INTs */ #define TPR_BLOCK_HWI 0x4f /* hardware INTs */
@ -104,20 +105,23 @@
#endif /** TEST_TEST1 */ #endif /** TEST_TEST1 */
/* TLB shootdowns */ /* TLB shootdowns */
#define XINVLTLB_OFFSET (ICU_OFFSET + 112) #define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */
#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */
#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */
/* inter-cpu clock handling */ /* inter-cpu clock handling */
#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113) #define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */
#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114) #define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */
/* inter-CPU rendezvous */ /* inter-CPU rendezvous */
#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115) #define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */
/* IPI to generate an additional software trap at the target CPU */ /* IPI to generate an additional software trap at the target CPU */
#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* XXX in the middle of the interrupt range, overlapping IRQ48 */
#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */
/* IPI to signal CPUs to stop and wait for another CPU to restart them */ /* IPI to signal CPUs to stop and wait for another CPU to restart them */
#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) #define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */
/* /*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
@ -181,7 +185,9 @@ inthand_t
IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31);
inthand_t inthand_t
Xinvltlb, /* TLB shootdowns */ Xinvltlb, /* TLB shootdowns - global */
Xinvlpg, /* TLB shootdowns - 1 page */
Xinvlrng, /* TLB shootdowns - page range */
Xhardclock, /* Forward hardclock() */ Xhardclock, /* Forward hardclock() */
Xstatclock, /* Forward statclock() */ Xstatclock, /* Forward statclock() */
Xcpuast, /* Additional software trap on other cpu */ Xcpuast, /* Additional software trap on other cpu */

View File

@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
} }
else { else {
vector = TPR_SLOW_INTS + intr; vector = TPR_SLOW_INTS + intr;
#ifdef APIC_INTR_REORDER
#ifdef APIC_INTR_HIGHPRI_CLOCK
/* XXX: Hack (kludge?) for more accurate clock. */
if (intr == apic_8254_intr || intr == 8) {
vector = TPR_FAST_INTS + intr;
}
#endif
#endif
setidt(vector, slowintr[intr], setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
} }

View File

@ -5,6 +5,7 @@ DISABLE_PSE
MATH_EMULATE opt_math_emulate.h MATH_EMULATE opt_math_emulate.h
GPL_MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h
PMAP_SHPGPERPROC opt_pmap.h PMAP_SHPGPERPROC opt_pmap.h
ENABLE_PG_G opt_pmap.h
PPC_PROBE_CHIPSET opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h
PPC_DEBUG opt_ppc.h PPC_DEBUG opt_ppc.h
SHOW_BUSYBUFS SHOW_BUSYBUFS

View File

@ -5,6 +5,7 @@ DISABLE_PSE
MATH_EMULATE opt_math_emulate.h MATH_EMULATE opt_math_emulate.h
GPL_MATH_EMULATE opt_math_emulate.h GPL_MATH_EMULATE opt_math_emulate.h
PMAP_SHPGPERPROC opt_pmap.h PMAP_SHPGPERPROC opt_pmap.h
ENABLE_PG_G opt_pmap.h
PPC_PROBE_CHIPSET opt_ppc.h PPC_PROBE_CHIPSET opt_ppc.h
PPC_DEBUG opt_ppc.h PPC_DEBUG opt_ppc.h
SHOW_BUSYBUFS SHOW_BUSYBUFS

View File

@ -181,30 +181,108 @@ Xspuriousint:
iret iret
/* /*
* Handle TLB shootdowns. * Global address space TLB shootdown.
*/ */
.text .text
SUPERALIGN_TEXT SUPERALIGN_TEXT
.globl Xinvltlb .globl Xinvltlb
Xinvltlb: Xinvltlb:
pushl %eax pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS #ifdef COUNT_XINVLTLB_HITS
pushl %fs pushl %fs
movl $KPSEL, %eax movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs mov %ax, %fs
movl PCPU(CPUID), %eax movl PCPU(CPUID), %eax
popl %fs popl %fs
ss incl xhits_gbl(,%eax,4)
incl _xhits(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */ #endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */ movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3 movl %eax, %cr3
ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
.globl Xinvlpg
Xinvlpg:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
ss
incl xhits_pg(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %eax
invlpg (%eax) /* invalidate single page */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Page range TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvlrng
Xinvlrng:
pushl %eax
pushl %edx
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_rng(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %edx
movl smp_tlb_addr2, %eax
1: invlpg (%edx) /* invalidate single page */
addl $PAGE_SIZE, %edx
cmpl %edx, %eax
jb 1b
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %edx
popl %eax popl %eax
iret iret
@ -443,12 +521,6 @@ Xrendezvous:
.data .data
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
_xhits:
.space (NCPU * 4), 0
#endif /* COUNT_XINVLTLB_HITS */
.globl apic_pin_trigger .globl apic_pin_trigger
apic_pin_trigger: apic_pin_trigger:
.long 0 .long 0

View File

@ -381,12 +381,6 @@ begin:
movl IdlePTD,%esi movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax) movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
1:
pushl physfree /* value of first for init386(first) */ pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */ call init386 /* wire 386 chip for unix operation */
@ -809,14 +803,7 @@ no_kernend:
jne map_read_write jne map_read_write
#endif #endif
xorl %edx,%edx xorl %edx,%edx
movl $R(etext),%ecx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 2f
orl $PG_G,%edx
#endif
2: movl $R(etext),%ecx
addl $PAGE_MASK,%ecx addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx) fillkptphys(%edx)
@ -827,13 +814,7 @@ no_kernend:
andl $~PAGE_MASK, %eax andl $~PAGE_MASK, %eax
map_read_write: map_read_write:
movl $PG_RW,%edx movl $PG_RW,%edx
#if !defined(SMP) movl R(KERNend),%ecx
testl $CPUID_PGE, R(cpu_feature)
jz 1f
orl $PG_G,%edx
#endif
1: movl R(KERNend),%ecx
subl %eax,%ecx subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx) fillkptphys(%edx)

View File

@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU]; struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
u_int smp_tlb_addr1;
u_int smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/* /*
* Local data and functions. * Local data and functions.
*/ */
@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK #ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN); mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */ #endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
#endif
} }
/* /*
@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */ /* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb, setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */ /* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock, setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */ return 0; /* return FAILURE */
} }
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/* /*
* Flush the TLB on all other CPU's * Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/ */
static void
smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
ipi_all_but_self(vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
{
u_int m;
int i, ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
/* Count the target cpus */
ncpu = 0;
m = mask;
while ((i = ffs(m)) != 0) {
m >>= i;
ncpu++;
}
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void void
smp_invltlb(void) smp_invltlb(void)
{ {
#if defined(APIC_IO) #if defined(APIC_IO)
if (smp_started) if (smp_started) {
ipi_all_but_self(IPI_INVLTLB); smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */ #endif /* APIC_IO */
} }
void void
invlpg(u_int addr) smp_invlpg(u_int addr)
{ {
__asm __volatile("invlpg (%0)"::"r"(addr):"memory"); #if defined(APIC_IO)
if (smp_started) {
/* send a message to the other CPUs */ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_invltlb(); #ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
} }
void void
invltlb(void) smp_invlpg_range(u_int addr1, u_int addr2)
{ {
u_long temp; #if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/* void
* This should be implemented as load_cr3(rcr3()) when load_cr3() is smp_masked_invltlb(u_int mask)
* inlined. {
*/ #if defined(APIC_IO)
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */ void
smp_invltlb(); smp_masked_invlpg(u_int mask, u_int addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
} }
@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */ /* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) { if (smp_cpus == mp_ncpus) {
@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{ {
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
} }

View File

@ -101,9 +101,6 @@ apic_initialize(void)
#endif /** TEST_TEST1 */ #endif /** TEST_TEST1 */
lapic.svr = temp; lapic.svr = temp;
if (bootverbose)
apic_dump("apic_initialize()");
} }

View File

@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU]; struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
u_int smp_tlb_addr1;
u_int smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/* /*
* Local data and functions. * Local data and functions.
*/ */
@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK #ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN); mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */ #endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
#endif
} }
/* /*
@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */ /* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb, setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */ /* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock, setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */ return 0; /* return FAILURE */
} }
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/* /*
* Flush the TLB on all other CPU's * Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/ */
static void
smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
ipi_all_but_self(vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
{
u_int m;
int i, ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
/* Count the target cpus */
ncpu = 0;
m = mask;
while ((i = ffs(m)) != 0) {
m >>= i;
ncpu++;
}
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void void
smp_invltlb(void) smp_invltlb(void)
{ {
#if defined(APIC_IO) #if defined(APIC_IO)
if (smp_started) if (smp_started) {
ipi_all_but_self(IPI_INVLTLB); smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */ #endif /* APIC_IO */
} }
void void
invlpg(u_int addr) smp_invlpg(u_int addr)
{ {
__asm __volatile("invlpg (%0)"::"r"(addr):"memory"); #if defined(APIC_IO)
if (smp_started) {
/* send a message to the other CPUs */ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_invltlb(); #ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
} }
void void
invltlb(void) smp_invlpg_range(u_int addr1, u_int addr2)
{ {
u_long temp; #if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/* void
* This should be implemented as load_cr3(rcr3()) when load_cr3() is smp_masked_invltlb(u_int mask)
* inlined. {
*/ #if defined(APIC_IO)
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */ void
smp_invltlb(); smp_masked_invlpg(u_int mask, u_int addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
} }
@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */ /* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) { if (smp_cpus == mp_ncpus) {
@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{ {
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
} }

View File

@ -85,6 +85,9 @@
#include <sys/user.h> #include <sys/user.h>
#include <sys/vmmeter.h> #include <sys/vmmeter.h>
#include <sys/sysctl.h> #include <sys/sysctl.h>
#if defined(SMP)
#include <sys/smp.h>
#endif
#include <vm/vm.h> #include <vm/vm.h>
#include <vm/vm_param.h> #include <vm/vm_param.h>
@ -101,7 +104,6 @@
#include <machine/md_var.h> #include <machine/md_var.h>
#include <machine/specialreg.h> #include <machine/specialreg.h>
#if defined(SMP) || defined(APIC_IO) #if defined(SMP) || defined(APIC_IO)
#include <machine/smp.h>
#include <machine/apic.h> #include <machine/apic.h>
#include <machine/segments.h> #include <machine/segments.h>
#include <machine/tss.h> #include <machine/tss.h>
@ -259,10 +261,10 @@ static vm_offset_t
pmap_kmem_choose(vm_offset_t addr) pmap_kmem_choose(vm_offset_t addr)
{ {
vm_offset_t newaddr = addr; vm_offset_t newaddr = addr;
#ifndef DISABLE_PSE #ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) { if (cpu_feature & CPUID_PSE)
newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1); newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
}
#endif #endif
return newaddr; return newaddr;
} }
@ -367,10 +369,9 @@ pmap_bootstrap(firstaddr, loadaddr)
PTD[i] = 0; PTD[i] = 0;
pgeflag = 0; pgeflag = 0;
#if !defined(SMP) /* XXX - see also mp_machdep.c */ #if !defined(SMP) || defined(ENABLE_PG_G)
if (cpu_feature & CPUID_PGE) { if (cpu_feature & CPUID_PGE)
pgeflag = PG_G; pgeflag = PG_G;
}
#endif #endif
/* /*
@ -383,7 +384,7 @@ pmap_bootstrap(firstaddr, loadaddr)
*/ */
pdir4mb = 0; pdir4mb = 0;
#if !defined(DISABLE_PSE) #ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) { if (cpu_feature & CPUID_PSE) {
pd_entry_t ptditmp; pd_entry_t ptditmp;
/* /*
@ -394,57 +395,64 @@ pmap_bootstrap(firstaddr, loadaddr)
ptditmp &= ~(NBPDR - 1); ptditmp &= ~(NBPDR - 1);
ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag; ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
pdir4mb = ptditmp; pdir4mb = ptditmp;
#if !defined(SMP)
/*
* Enable the PSE mode.
*/
load_cr4(rcr4() | CR4_PSE);
/*
* We can do the mapping here for the single processor
* case. We simply ignore the old page table page from
* now on.
*/
/*
* For SMP, we still need 4K pages to bootstrap APs,
* PSE will be enabled as soon as all APs are up.
*/
PTD[KPTDI] = (pd_entry_t) ptditmp;
kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
invltlb();
#endif
} }
#endif #endif
#ifndef SMP
/*
* Turn on PGE/PSE. SMP does this later on since the
* 4K page tables are required for AP boot (for now).
* XXX fixme.
*/
pmap_set_opt();
#endif
#ifdef SMP #ifdef SMP
if (cpu_apic_address == 0) if (cpu_apic_address == 0)
panic("pmap_bootstrap: no local apic! (non-SMP hardware?)"); panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
/* local apic is mapped on last page */ /* local apic is mapped on last page */
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag | SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
(cpu_apic_address & PG_FRAME)); (cpu_apic_address & PG_FRAME));
#endif #endif
cpu_invltlb();
invltlb();
} }
#ifdef SMP
/* /*
* Set 4mb pdir for mp startup * Enable 4MB page mode for MP startup. Turn on PG_G support.
* BSP will run this after all the AP's have started up.
*/ */
void void
pmap_set_opt(void) pmap_set_opt(void)
{ {
if (pseflag && (cpu_feature & CPUID_PSE)) { pt_entry_t *pte;
vm_offset_t va;
if (pgeflag && (cpu_feature & CPUID_PGE))
load_cr4(rcr4() | CR4_PGE);
#ifndef DISABLE_PSE
if (pseflag && (cpu_feature & CPUID_PSE))
load_cr4(rcr4() | CR4_PSE); load_cr4(rcr4() | CR4_PSE);
if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
cpu_invltlb();
}
}
}
#endif #endif
if (PCPU_GET(cpuid) == 0) {
#ifndef DISABLE_PSE
if (pdir4mb)
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
#endif
if (pgeflag) {
/* XXX see earlier comments about virtual_avail */
for (va = KERNBASE; va < virtual_avail; va += PAGE_SIZE)
{
pte = vtopte(va);
if (*pte)
*pte |= pgeflag;
}
}
/*
* for SMP, this will cause all cpus to reload again, which
* is actually what we want since they now have CR4_PGE on.
*/
invltlb();
} else
cpu_invltlb();
}
/* /*
* Initialize the pmap module. * Initialize the pmap module.
@ -552,27 +560,37 @@ pmap_track_modified(vm_offset_t va)
return 0; return 0;
} }
static PMAP_INLINE void
invltlb_1pg(vm_offset_t va)
{
#ifdef I386_CPU
invltlb();
#else
invlpg(va);
#endif
}
static __inline void static __inline void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va) pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{ {
#if defined(SMP) #if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask)) u_int cpumask;
cpu_invlpg((void *)va); u_int other_cpus;
if (pmap->pm_active & PCPU_GET(other_cpus)) struct thread *td;
smp_invltlb();
td = curthread;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (td->td_critnest == 1)
cpu_critical_exit(td->td_savecrit);
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invlpg(va); /* global */
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
cpu_invlpg(va);
if (pmap->pm_active & other_cpus)
smp_masked_invlpg(pmap->pm_active & other_cpus, va);
}
critical_exit();
#else #else
if (pmap->pm_active) if (pmap->pm_active)
invltlb_1pg(va); cpu_invlpg(va);
#endif #endif
} }
@ -580,10 +598,30 @@ static __inline void
pmap_invalidate_all(pmap_t pmap) pmap_invalidate_all(pmap_t pmap)
{ {
#if defined(SMP) #if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask)) u_int cpumask;
cpu_invltlb(); u_int other_cpus;
if (pmap->pm_active & PCPU_GET(other_cpus)) struct thread *td;
smp_invltlb();
td = curthread;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (td->td_critnest == 1)
cpu_critical_exit(td->td_savecrit);
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invltlb(); /* global */
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
cpu_invltlb();
if (pmap->pm_active & other_cpus)
smp_masked_invltlb(pmap->pm_active & other_cpus);
}
critical_exit();
#else #else
if (pmap->pm_active) if (pmap->pm_active)
invltlb(); invltlb();
@ -609,12 +647,7 @@ get_ptbase(pmap)
/* otherwise, we are alternate address space */ /* otherwise, we are alternate address space */
if (frame != (APTDpde & PG_FRAME)) { if (frame != (APTDpde & PG_FRAME)) {
APTDpde = (pd_entry_t) (frame | PG_RW | PG_V); APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb(); invltlb();
#endif
} }
return APTmap; return APTmap;
} }
@ -643,7 +676,7 @@ pmap_pte_quick(pmap, va)
newpf = pde & PG_FRAME; newpf = pde & PG_FRAME;
if (((*PMAP1) & PG_FRAME) != newpf) { if (((*PMAP1) & PG_FRAME) != newpf) {
*PMAP1 = newpf | PG_RW | PG_V; *PMAP1 = newpf | PG_RW | PG_V;
invltlb_1pg((vm_offset_t) PADDR1); pmap_invalidate_page(pmap, (vm_offset_t) PADDR1);
} }
return PADDR1 + (index & (NPTEPG - 1)); return PADDR1 + (index & (NPTEPG - 1));
} }
@ -689,20 +722,17 @@ pmap_extract(pmap, va)
/* /*
* add a wired page to the kva * add a wired page to the kva
* note that in order for the mapping to take effect -- you
* should do a invltlb after doing the pmap_kenter...
*/ */
PMAP_INLINE void PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa) pmap_kenter(vm_offset_t va, vm_offset_t pa)
{ {
pt_entry_t *pte; pt_entry_t *pte;
pt_entry_t npte, opte; pt_entry_t npte;
npte = pa | PG_RW | PG_V | pgeflag; npte = pa | PG_RW | PG_V | pgeflag;
pte = vtopte(va); pte = vtopte(va);
opte = *pte;
*pte = npte; *pte = npte;
invltlb_1pg(va); invlpg(va);
} }
/* /*
@ -715,7 +745,7 @@ pmap_kremove(vm_offset_t va)
pte = vtopte(va); pte = vtopte(va);
*pte = 0; *pte = 0;
invltlb_1pg(va); invlpg(va);
} }
/* /*
@ -733,13 +763,17 @@ pmap_kremove(vm_offset_t va)
vm_offset_t vm_offset_t
pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
{ {
vm_offset_t sva = *virt; vm_offset_t va, sva;
vm_offset_t va = sva; pt_entry_t *pte;
va = sva = *virt;
while (start < end) { while (start < end) {
pmap_kenter(va, start); pte = vtopte(va);
*pte = start | PG_RW | PG_V | pgeflag;
va += PAGE_SIZE; va += PAGE_SIZE;
start += PAGE_SIZE; start += PAGE_SIZE;
} }
invlpg_range(sva, end);
*virt = va; *virt = va;
return (sva); return (sva);
} }
@ -754,28 +788,21 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
* over. The page *must* be wired. * over. The page *must* be wired.
*/ */
void void
pmap_qenter(vm_offset_t va, vm_page_t *m, int count) pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{ {
vm_offset_t end_va; vm_offset_t va, end_va;
pt_entry_t *pte;
va = sva;
end_va = va + count * PAGE_SIZE; end_va = va + count * PAGE_SIZE;
while (va < end_va) {
pt_entry_t *pte;
while (va < end_va) {
pte = vtopte(va); pte = vtopte(va);
*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag; *pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va += PAGE_SIZE; va += PAGE_SIZE;
m++; m++;
} }
#ifdef SMP invlpg_range(sva, end_va);
smp_invltlb();
#endif
} }
/* /*
@ -783,27 +810,20 @@ pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
* kernel -- it is meant only for temporary mappings. * kernel -- it is meant only for temporary mappings.
*/ */
void void
pmap_qremove(vm_offset_t va, int count) pmap_qremove(vm_offset_t sva, int count)
{ {
vm_offset_t end_va; pt_entry_t *pte;
vm_offset_t va, end_va;
end_va = va + count*PAGE_SIZE; va = sva;
end_va = va + count * PAGE_SIZE;
while (va < end_va) { while (va < end_va) {
pt_entry_t *pte;
pte = vtopte(va); pte = vtopte(va);
*pte = 0; *pte = 0;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va += PAGE_SIZE; va += PAGE_SIZE;
} }
#ifdef SMP invlpg_range(sva, end_va);
smp_invltlb();
#endif
} }
static vm_page_t static vm_page_t
@ -824,9 +844,6 @@ pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
void void
pmap_new_proc(struct proc *p) pmap_new_proc(struct proc *p)
{ {
#ifdef I386_CPU
int updateneeded = 0;
#endif
int i; int i;
vm_object_t upobj; vm_object_t upobj;
vm_offset_t up; vm_offset_t up;
@ -870,23 +887,14 @@ pmap_new_proc(struct proc *p)
* Enter the page into the kernel address space. * Enter the page into the kernel address space.
*/ */
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
if (oldpte) { if (oldpte)
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(up + i * PAGE_SIZE); invlpg(up + i * PAGE_SIZE);
#endif
}
vm_page_wakeup(m); vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO); vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL;
} }
#ifdef I386_CPU
if (updateneeded)
invltlb();
#endif
} }
/* /*
@ -901,7 +909,7 @@ pmap_dispose_proc(p)
vm_object_t upobj; vm_object_t upobj;
vm_offset_t up; vm_offset_t up;
vm_page_t m; vm_page_t m;
pt_entry_t *ptek, oldpte; pt_entry_t *ptek;
upobj = p->p_upages_obj; upobj = p->p_upages_obj;
up = (vm_offset_t)p->p_uarea; up = (vm_offset_t)p->p_uarea;
@ -911,17 +919,11 @@ pmap_dispose_proc(p)
if (m == NULL) if (m == NULL)
panic("pmap_dispose_proc: upage already missing?"); panic("pmap_dispose_proc: upage already missing?");
vm_page_busy(m); vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0; *(ptek + i) = 0;
#ifndef I386_CPU
invlpg(up + i * PAGE_SIZE); invlpg(up + i * PAGE_SIZE);
#endif
vm_page_unwire(m, 0); vm_page_unwire(m, 0);
vm_page_free(m); vm_page_free(m);
} }
#ifdef I386_CPU
invltlb();
#endif
} }
/* /*
@ -986,9 +988,6 @@ pmap_swapin_proc(p)
void void
pmap_new_thread(struct thread *td) pmap_new_thread(struct thread *td)
{ {
#ifdef I386_CPU
int updateneeded = 0;
#endif
int i; int i;
vm_object_t ksobj; vm_object_t ksobj;
vm_page_t m; vm_page_t m;
@ -1019,13 +1018,8 @@ pmap_new_thread(struct thread *td)
ptek = vtopte(ks - PAGE_SIZE); ptek = vtopte(ks - PAGE_SIZE);
oldpte = *ptek; oldpte = *ptek;
*ptek = 0; *ptek = 0;
if (oldpte) { if (oldpte)
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks - PAGE_SIZE); invlpg(ks - PAGE_SIZE);
#endif
}
ptek++; ptek++;
#else #else
/* get a kernel virtual address for the kstack for this thread */ /* get a kernel virtual address for the kstack for this thread */
@ -1055,23 +1049,14 @@ pmap_new_thread(struct thread *td)
* Enter the page into the kernel address space. * Enter the page into the kernel address space.
*/ */
*(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag; *(ptek + i) = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
if (oldpte) { if (oldpte)
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks + i * PAGE_SIZE); invlpg(ks + i * PAGE_SIZE);
#endif
}
vm_page_wakeup(m); vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO); vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE); vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL; m->valid = VM_PAGE_BITS_ALL;
} }
#ifdef I386_CPU
if (updateneeded)
invltlb();
#endif
} }
/* /*
@ -1086,7 +1071,7 @@ pmap_dispose_thread(td)
vm_object_t ksobj; vm_object_t ksobj;
vm_offset_t ks; vm_offset_t ks;
vm_page_t m; vm_page_t m;
pt_entry_t *ptek, oldpte; pt_entry_t *ptek;
ksobj = td->td_kstack_obj; ksobj = td->td_kstack_obj;
ks = td->td_kstack; ks = td->td_kstack;
@ -1096,17 +1081,11 @@ pmap_dispose_thread(td)
if (m == NULL) if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?"); panic("pmap_dispose_thread: kstack already missing?");
vm_page_busy(m); vm_page_busy(m);
oldpte = *(ptek + i);
*(ptek + i) = 0; *(ptek + i) = 0;
#ifndef I386_CPU
invlpg(ks + i * PAGE_SIZE); invlpg(ks + i * PAGE_SIZE);
#endif
vm_page_unwire(m, 0); vm_page_unwire(m, 0);
vm_page_free(m); vm_page_free(m);
} }
#ifdef I386_CPU
invltlb();
#endif
} }
/* /*
@ -2207,13 +2186,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) { if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
if ((origpte & PG_RW) == 0) { if ((origpte & PG_RW) == 0) {
*pte |= PG_RW; *pte |= PG_RW;
#ifdef SMP pmap_invalidate_page(pmap, va);
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
} }
return; return;
} }
@ -2281,13 +2254,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((origpte & ~(PG_M|PG_A)) != newpte) { if ((origpte & ~(PG_M|PG_A)) != newpte) {
*pte = newpte | PG_A; *pte = newpte | PG_A;
/*if (origpte)*/ { /*if (origpte)*/ {
#ifdef SMP pmap_invalidate_page(pmap, va);
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
} }
} }
} }
@ -2710,7 +2677,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t pdnxt; vm_offset_t pdnxt;
pd_entry_t src_frame, dst_frame; pd_entry_t src_frame, dst_frame;
vm_page_t m; vm_page_t m;
pd_entry_t saved_pde;
if (dst_addr != src_addr) if (dst_addr != src_addr)
return; return;
@ -2720,17 +2686,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
return; return;
dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME; dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
if (dst_frame != (APTDpde & PG_FRAME)) { for (addr = src_addr; addr < end_addr; addr = pdnxt) {
APTDpde = dst_frame | PG_RW | PG_V;
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb();
#endif
}
saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V);
for(addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte; pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte; vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr; pd_entry_t srcptepaddr;
@ -2771,6 +2727,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pdnxt > end_addr) if (pdnxt > end_addr)
pdnxt = end_addr; pdnxt = end_addr;
/*
* Have to recheck this before every avtopte() call below
* in case we have blocked and something else used APTDpde.
*/
if (dst_frame != (APTDpde & PG_FRAME)) {
APTDpde = dst_frame | PG_RW | PG_V;
invltlb();
}
src_pte = vtopte(addr); src_pte = vtopte(addr);
dst_pte = avtopte(addr); dst_pte = avtopte(addr);
while (addr < pdnxt) { while (addr < pdnxt) {
@ -2786,16 +2750,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block. * block.
*/ */
dstmpte = pmap_allocpte(dst_pmap, addr); dstmpte = pmap_allocpte(dst_pmap, addr);
if ((APTDpde & PG_FRAME) !=
(saved_pde & PG_FRAME)) {
APTDpde = saved_pde;
printf ("IT HAPPENNED!");
#if defined(SMP)
cpu_invltlb();
#else
invltlb();
#endif
}
if ((*dst_pte == 0) && (ptetemp = *src_pte)) { if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/* /*
* Clear the modified and * Clear the modified and
@ -2839,12 +2793,15 @@ void
pmap_zero_page(vm_offset_t phys) pmap_zero_page(vm_offset_t phys)
{ {
#ifdef SMP
/* XXX overkill, we only want to disable migration here */
/* XXX or maybe not. down the track we have reentrancy issues */
critical_enter();
#endif
if (*CMAP2) if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy"); panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2); cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU) #if defined(I686_CPU)
if (cpu_class == CPUCLASS_686) if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2); i686_pagezero(CADDR2);
@ -2852,6 +2809,9 @@ pmap_zero_page(vm_offset_t phys)
#endif #endif
bzero(CADDR2, PAGE_SIZE); bzero(CADDR2, PAGE_SIZE);
*CMAP2 = 0; *CMAP2 = 0;
#ifdef SMP
critical_exit();
#endif
} }
/* /*
@ -2864,12 +2824,15 @@ void
pmap_zero_page_area(vm_offset_t phys, int off, int size) pmap_zero_page_area(vm_offset_t phys, int off, int size)
{ {
#ifdef SMP
/* XXX overkill, we only want to disable migration here */
/* XXX or maybe not. down the track we have reentrancy issues */
critical_enter();
#endif
if (*CMAP2) if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy"); panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M; *CMAP2 = PG_V | PG_RW | (phys & PG_FRAME) | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2); cpu_invlpg((vm_offset_t)CADDR2); /* SMP: local cpu only */
#if defined(I686_CPU) #if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2); i686_pagezero(CADDR2);
@ -2877,6 +2840,9 @@ pmap_zero_page_area(vm_offset_t phys, int off, int size)
#endif #endif
bzero((char *)CADDR2 + off, size); bzero((char *)CADDR2 + off, size);
*CMAP2 = 0; *CMAP2 = 0;
#ifdef SMP
critical_exit();
#endif
} }
/* /*
@ -2889,6 +2855,11 @@ void
pmap_copy_page(vm_offset_t src, vm_offset_t dst) pmap_copy_page(vm_offset_t src, vm_offset_t dst)
{ {
#ifdef SMP
/* XXX overkill, we only want to disable migration here */
/* XXX or maybe not. down the track we have reentrancy issues */
critical_enter();
#endif
if (*CMAP1) if (*CMAP1)
panic("pmap_copy_page: CMAP1 busy"); panic("pmap_copy_page: CMAP1 busy");
if (*CMAP2) if (*CMAP2)
@ -2896,17 +2867,14 @@ pmap_copy_page(vm_offset_t src, vm_offset_t dst)
*CMAP1 = PG_V | (src & PG_FRAME) | PG_A; *CMAP1 = PG_V | (src & PG_FRAME) | PG_A;
*CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M; *CMAP2 = PG_V | PG_RW | (dst & PG_FRAME) | PG_A | PG_M;
#ifdef I386_CPU cpu_invlpg((u_int)CADDR1); /* SMP: local only */
invltlb(); cpu_invlpg((u_int)CADDR2); /* SMP: local only */
#else
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
#endif
bcopy(CADDR1, CADDR2, PAGE_SIZE); bcopy(CADDR1, CADDR2, PAGE_SIZE);
*CMAP1 = 0; *CMAP1 = 0;
*CMAP2 = 0; *CMAP2 = 0;
#ifdef SMP
critical_exit();
#endif
} }
@ -3322,14 +3290,13 @@ pmap_mapdev(pa, size)
panic("pmap_mapdev: Couldn't alloc kernel virtual memory"); panic("pmap_mapdev: Couldn't alloc kernel virtual memory");
pa = pa & PG_FRAME; pa = pa & PG_FRAME;
for (tmpva = va; size > 0;) { for (tmpva = va; size > 0; ) {
pte = vtopte(tmpva); pte = vtopte(tmpva);
*pte = pa | PG_RW | PG_V | pgeflag; *pte = pa | PG_RW | PG_V | pgeflag;
size -= PAGE_SIZE; size -= PAGE_SIZE;
tmpva += PAGE_SIZE; tmpva += PAGE_SIZE;
pa += PAGE_SIZE;
} }
invltlb(); invlpg_range(va, tmpva);
return ((void *)(va + offset)); return ((void *)(va + offset));
} }
@ -3339,11 +3306,20 @@ pmap_unmapdev(va, size)
vm_offset_t va; vm_offset_t va;
vm_size_t size; vm_size_t size;
{ {
vm_offset_t base, offset; vm_offset_t base, offset, tmpva;
pt_entry_t *pte;
base = va & PG_FRAME; base = va & PG_FRAME;
offset = va & PAGE_MASK; offset = va & PAGE_MASK;
size = roundup(offset + size, PAGE_SIZE); size = roundup(offset + size, PAGE_SIZE);
for (tmpva = base; size > 0; ) {
pte = vtopte(tmpva);
*pte = 0;
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
}
invlpg_range(va, tmpva);
kmem_free(kernel_map, base, size); kmem_free(kernel_map, base, size);
} }

View File

@ -1591,42 +1591,6 @@ ENTRY(ssdtosd)
popl %ebx popl %ebx
ret ret
/* load_cr0(cr0) */
ENTRY(load_cr0)
movl 4(%esp),%eax
movl %eax,%cr0
ret
/* rcr0() */
ENTRY(rcr0)
movl %cr0,%eax
ret
/* rcr3() */
ENTRY(rcr3)
movl %cr3,%eax
ret
/* void load_cr3(caddr_t cr3) */
ENTRY(load_cr3)
#ifdef SWTCH_OPTIM_STATS
incl tlb_flush_count
#endif
movl 4(%esp),%eax
movl %eax,%cr3
ret
/* rcr4() */
ENTRY(rcr4)
movl %cr4,%eax
ret
/* void load_cr4(caddr_t cr4) */
ENTRY(load_cr4)
movl 4(%esp),%eax
movl %eax,%cr4
ret
/* void reset_dbregs() */ /* void reset_dbregs() */
ENTRY(reset_dbregs) ENTRY(reset_dbregs)
movl $0,%eax movl $0,%eax

View File

@ -227,62 +227,6 @@ invd(void)
__asm __volatile("invd"); __asm __volatile("invd");
} }
#if defined(SMP) && defined(_KERNEL)
/*
* When using APIC IPI's, invlpg() is not simply the invlpg instruction
* (this is a bug) and the inlining cost is prohibitive since the call
* executes into the IPI transmission system.
*/
void invlpg __P((u_int addr));
void invltlb __P((void));
static __inline void
cpu_invlpg(void *addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
cpu_invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
#else /* !(SMP && _KERNEL) */
static __inline void
invlpg(u_int addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#ifdef SWTCH_OPTIM_STATS
++tlb_flush_count;
#endif
}
#endif /* SMP && _KERNEL */
static __inline u_short static __inline u_short
inw(u_int port) inw(u_int port)
{ {
@ -347,15 +291,6 @@ outw(u_int port, u_short data)
__asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port)); __asm __volatile("outw %0,%%dx" : : "a" (data), "d" (port));
} }
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline u_int static __inline u_int
read_eflags(void) read_eflags(void)
{ {
@ -420,6 +355,162 @@ wrmsr(u_int msr, u_int64_t newval)
__asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); __asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
} }
static __inline void
load_cr0(u_int data)
{
__asm __volatile("movl %0,%%cr0" : : "r" (data));
}
static __inline u_int
rcr0(void)
{
u_int data;
__asm __volatile("movl %%cr0,%0" : "=r" (data));
return (data);
}
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr3(u_int data)
{
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
static __inline u_int
rcr3(void)
{
u_int data;
__asm __volatile("movl %%cr3,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr4(u_int data)
{
__asm __volatile("movl %0,%%cr4" : : "r" (data));
}
static __inline u_int
rcr4(void)
{
u_int data;
__asm __volatile("movl %%cr4,%0" : "=r" (data));
return (data);
}
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
cpu_invltlb(void)
{
load_cr3(rcr3());
}
/*
* TLB flush for an individual page (even if it has PG_G).
* Only works on 486+ CPUs (i386 does not have PG_G).
*/
static __inline void
cpu_invlpg(u_int addr)
{
#ifndef I386_CPU
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
#else
cpu_invltlb();
#endif
}
#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
/*
* Same as above but for a range of pages.
*/
static __inline void
cpu_invlpg_range(u_int startva, u_int endva)
{
#ifndef I386_CPU
u_int addr;
for (addr = startva; addr < endva; addr += PAGE_SIZE)
__asm __volatile("invlpg %0" : : "m" (*(char *)addr));
__asm __volatile("" : : : "memory");
#else
cpu_invltlb();
#endif
}
#endif
#ifdef SMP
extern void smp_invlpg(u_int addr);
extern void smp_masked_invlpg(u_int mask, u_int addr);
#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
extern void smp_invlpg_range(u_int startva, u_int endva);
extern void smp_masked_invlpg_range(u_int mask, u_int startva, u_int endva);
#endif
extern void smp_invltlb(void);
extern void smp_masked_invltlb(u_int mask);
#endif
/*
* Generic page TLB flush. Takes care of SMP.
*/
static __inline void
invlpg(u_int addr)
{
cpu_invlpg(addr);
#ifdef SMP
smp_invlpg(addr);
#endif
}
#ifdef PAGE_SIZE /* Avoid this file depending on sys/param.h */
/*
* Generic TLB flush for a range of pages. Takes care of SMP.
* Saves many IPIs for SMP mode.
*/
static __inline void
invlpg_range(u_int startva, u_int endva)
{
cpu_invlpg_range(startva, endva);
#ifdef SMP
smp_invlpg_range(startva, endva);
#endif
}
#endif
/*
* Generic global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
invltlb(void)
{
cpu_invltlb();
#ifdef SMP
smp_invltlb();
#endif
}
static __inline u_int static __inline u_int
rfs(void) rfs(void)
{ {
@ -581,6 +672,8 @@ cpu_critical_exit(critical_t eflags)
int breakpoint __P((void)); int breakpoint __P((void));
u_int bsfl __P((u_int mask)); u_int bsfl __P((u_int mask));
u_int bsrl __P((u_int mask)); u_int bsrl __P((u_int mask));
void cpu_invlpg __P((u_int addr));
void cpu_invlpg_range __P((u_int start, u_int end));
void disable_intr __P((void)); void disable_intr __P((void));
void do_cpuid __P((u_int ax, u_int *p)); void do_cpuid __P((u_int ax, u_int *p));
void enable_intr __P((void)); void enable_intr __P((void));
@ -591,15 +684,26 @@ void insl __P((u_int port, void *addr, size_t cnt));
void insw __P((u_int port, void *addr, size_t cnt)); void insw __P((u_int port, void *addr, size_t cnt));
void invd __P((void)); void invd __P((void));
void invlpg __P((u_int addr)); void invlpg __P((u_int addr));
void invlpg_range __P((u_int start, u_int end));
void invltlb __P((void)); void invltlb __P((void));
u_short inw __P((u_int port)); u_short inw __P((u_int port));
void load_cr0 __P((u_int cr0));
void load_cr3 __P((u_int cr3));
void load_cr4 __P((u_int cr4));
void load_fs __P((u_int sel));
void load_gs __P((u_int sel));
void outb __P((u_int port, u_char data)); void outb __P((u_int port, u_char data));
void outl __P((u_int port, u_int data)); void outl __P((u_int port, u_int data));
void outsb __P((u_int port, void *addr, size_t cnt)); void outsb __P((u_int port, void *addr, size_t cnt));
void outsl __P((u_int port, void *addr, size_t cnt)); void outsl __P((u_int port, void *addr, size_t cnt));
void outsw __P((u_int port, void *addr, size_t cnt)); void outsw __P((u_int port, void *addr, size_t cnt));
void outw __P((u_int port, u_short data)); void outw __P((u_int port, u_short data));
u_int rcr0 __P((void));
u_int rcr2 __P((void)); u_int rcr2 __P((void));
u_int rcr3 __P((void));
u_int rcr4 __P((void));
u_int rfs __P((void));
u_int rgs __P((void));
u_int64_t rdmsr __P((u_int msr)); u_int64_t rdmsr __P((u_int msr));
u_int64_t rdpmc __P((u_int pmc)); u_int64_t rdpmc __P((u_int pmc));
u_int64_t rdtsc __P((void)); u_int64_t rdtsc __P((void));
@ -607,22 +711,12 @@ u_int read_eflags __P((void));
void wbinvd __P((void)); void wbinvd __P((void));
void write_eflags __P((u_int ef)); void write_eflags __P((u_int ef));
void wrmsr __P((u_int msr, u_int64_t newval)); void wrmsr __P((u_int msr, u_int64_t newval));
u_int rfs __P((void));
u_int rgs __P((void));
void load_fs __P((u_int sel));
void load_gs __P((u_int sel));
critical_t cpu_critical_enter __P((void)); critical_t cpu_critical_enter __P((void));
void cpu_critical_exit __P((critical_t eflags)); void cpu_critical_exit __P((critical_t eflags));
#endif /* __GNUC__ */ #endif /* __GNUC__ */
void load_cr0 __P((u_int cr0));
void load_cr3 __P((u_int cr3));
void load_cr4 __P((u_int cr4));
void ltr __P((u_short sel)); void ltr __P((u_short sel));
u_int rcr0 __P((void));
u_int rcr3 __P((void));
u_int rcr4 __P((void));
void reset_dbregs __P((void)); void reset_dbregs __P((void));
__END_DECLS __END_DECLS

View File

@ -287,6 +287,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU]; struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
u_int smp_tlb_addr1;
u_int smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/* /*
* Local data and functions. * Local data and functions.
*/ */
@ -335,6 +343,9 @@ init_locks(void)
#ifdef USE_COMLOCK #ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", MTX_SPIN); mtx_init(&com_mtx, "com", MTX_SPIN);
#endif /* USE_COMLOCK */ #endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", MTX_SPIN);
#endif
} }
/* /*
@ -604,6 +615,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */ /* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb, setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */ /* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock, setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2186,42 +2201,198 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */ return 0; /* return FAILURE */
} }
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/* /*
* Flush the TLB on all other CPU's * Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/ */
static void
smp_tlb_shootdown(u_int vector, u_int addr1, u_int addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
ipi_all_but_self(vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, u_int addr1, u_int addr2)
{
u_int m;
int i, ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
/* Count the target cpus */
ncpu = 0;
m = mask;
while ((i = ffs(m)) != 0) {
m >>= i;
ncpu++;
}
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
smp_tlb_wait = 0;
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (atomic_load_acq_int(&smp_tlb_wait) < ncpu)
/* XXX cpu_pause() */ ;
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void void
smp_invltlb(void) smp_invltlb(void)
{ {
#if defined(APIC_IO) #if defined(APIC_IO)
if (smp_started) if (smp_started) {
ipi_all_but_self(IPI_INVLTLB); smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */ #endif /* APIC_IO */
} }
void void
invlpg(u_int addr) smp_invlpg(u_int addr)
{ {
__asm __volatile("invlpg (%0)"::"r"(addr):"memory"); #if defined(APIC_IO)
if (smp_started) {
/* send a message to the other CPUs */ smp_tlb_shootdown(IPI_INVLPG, addr, 0);
smp_invltlb(); #ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
} }
void void
invltlb(void) smp_invlpg_range(u_int addr1, u_int addr2)
{ {
u_long temp; #if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/* void
* This should be implemented as load_cr3(rcr3()) when load_cr3() is smp_masked_invltlb(u_int mask)
* inlined. {
*/ #if defined(APIC_IO)
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory"); if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */ void
smp_invltlb(); smp_masked_invlpg(u_int mask, u_int addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, u_int addr1, u_int addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
} }
@ -2280,6 +2451,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */ /* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) { if (smp_cpus == mp_ncpus) {
@ -2312,7 +2486,8 @@ forwarded_statclock(struct trapframe frame)
{ {
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame)); statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock); mtx_unlock_spin(&sched_lock);
} }

View File

@ -267,9 +267,7 @@ void *pmap_mapdev __P((vm_offset_t, vm_size_t));
void pmap_unmapdev __P((vm_offset_t, vm_size_t)); void pmap_unmapdev __P((vm_offset_t, vm_size_t));
pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2; pt_entry_t *pmap_pte __P((pmap_t, vm_offset_t)) __pure2;
vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t)); vm_page_t pmap_use_pt __P((pmap_t, vm_offset_t));
#ifdef SMP
void pmap_set_opt __P((void)); void pmap_set_opt __P((void));
#endif
#endif /* _KERNEL */ #endif /* _KERNEL */

View File

@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */
* Interprocessor interrupts for SMP. * Interprocessor interrupts for SMP.
*/ */
#define IPI_INVLTLB XINVLTLB_OFFSET #define IPI_INVLTLB XINVLTLB_OFFSET
#define IPI_INVLPG XINVLPG_OFFSET
#define IPI_INVLRNG XINVLRNG_OFFSET
#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET #define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET
#define IPI_AST XCPUAST_OFFSET #define IPI_AST XCPUAST_OFFSET
#define IPI_STOP XCPUSTOP_OFFSET #define IPI_STOP XCPUSTOP_OFFSET
@ -107,7 +109,6 @@ void assign_apic_irq __P((int apic, int intpin, int irq));
void revoke_apic_irq __P((int irq)); void revoke_apic_irq __P((int irq));
void bsp_apic_configure __P((void)); void bsp_apic_configure __P((void));
void init_secondary __P((void)); void init_secondary __P((void));
void smp_invltlb __P((void));
void forward_statclock __P((void)); void forward_statclock __P((void));
void forwarded_statclock __P((struct trapframe frame)); void forwarded_statclock __P((struct trapframe frame));
void forward_hardclock __P((void)); void forward_hardclock __P((void));

View File

@ -90,13 +90,6 @@
*/ */
#define APIC_INTR_REORDER #define APIC_INTR_REORDER
/*
* Redirect clock interrupts to a higher priority (fast intr) vector,
* while still using the slow interrupt handler. Only effective when
* APIC_INTR_REORDER is defined.
*/
#define APIC_INTR_HIGHPRI_CLOCK
#endif /* APIC_IO */ #endif /* APIC_IO */
/* /*

View File

@ -181,30 +181,108 @@ Xspuriousint:
iret iret
/* /*
* Handle TLB shootdowns. * Global address space TLB shootdown.
*/ */
.text .text
SUPERALIGN_TEXT SUPERALIGN_TEXT
.globl Xinvltlb .globl Xinvltlb
Xinvltlb: Xinvltlb:
pushl %eax pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS #ifdef COUNT_XINVLTLB_HITS
pushl %fs pushl %fs
movl $KPSEL, %eax movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs mov %ax, %fs
movl PCPU(CPUID), %eax movl PCPU(CPUID), %eax
popl %fs popl %fs
ss incl xhits_gbl(,%eax,4)
incl _xhits(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */ #endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */ movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3 movl %eax, %cr3
ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */ movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
.globl Xinvlpg
Xinvlpg:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
ss
incl xhits_pg(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %eax
invlpg (%eax) /* invalidate single page */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Page range TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvlrng
Xinvlrng:
pushl %eax
pushl %edx
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_rng(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %edx
movl smp_tlb_addr2, %eax
1: invlpg (%edx) /* invalidate single page */
addl $PAGE_SIZE, %edx
cmpl %edx, %eax
jb 1b
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %edx
popl %eax popl %eax
iret iret
@ -443,12 +521,6 @@ Xrendezvous:
.data .data
#ifdef COUNT_XINVLTLB_HITS
.globl _xhits
_xhits:
.space (NCPU * 4), 0
#endif /* COUNT_XINVLTLB_HITS */
.globl apic_pin_trigger .globl apic_pin_trigger
apic_pin_trigger: apic_pin_trigger:
.long 0 .long 0

View File

@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
} }
else { else {
vector = TPR_SLOW_INTS + intr; vector = TPR_SLOW_INTS + intr;
#ifdef APIC_INTR_REORDER
#ifdef APIC_INTR_HIGHPRI_CLOCK
/* XXX: Hack (kludge?) for more accurate clock. */
if (intr == apic_8254_intr || intr == 8) {
vector = TPR_FAST_INTS + intr;
}
#endif
#endif
setidt(vector, slowintr[intr], setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
} }

View File

@ -88,6 +88,7 @@
/* IDT vector base for regular (aka. slow) and fast interrupts */ /* IDT vector base for regular (aka. slow) and fast interrupts */
#define TPR_SLOW_INTS 0x20 #define TPR_SLOW_INTS 0x20
#define TPR_FAST_INTS 0x60 #define TPR_FAST_INTS 0x60
/* XXX note that the AST interrupt is at 0x50 */
/* blocking values for local APIC Task Priority Register */ /* blocking values for local APIC Task Priority Register */
#define TPR_BLOCK_HWI 0x4f /* hardware INTs */ #define TPR_BLOCK_HWI 0x4f /* hardware INTs */
@ -104,20 +105,23 @@
#endif /** TEST_TEST1 */ #endif /** TEST_TEST1 */
/* TLB shootdowns */ /* TLB shootdowns */
#define XINVLTLB_OFFSET (ICU_OFFSET + 112) #define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */
#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */
#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */
/* inter-cpu clock handling */ /* inter-cpu clock handling */
#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113) #define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */
#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114) #define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */
/* inter-CPU rendezvous */ /* inter-CPU rendezvous */
#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115) #define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */
/* IPI to generate an additional software trap at the target CPU */ /* IPI to generate an additional software trap at the target CPU */
#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* XXX in the middle of the interrupt range, overlapping IRQ48 */
#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */
/* IPI to signal CPUs to stop and wait for another CPU to restart them */ /* IPI to signal CPUs to stop and wait for another CPU to restart them */
#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) #define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */
/* /*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff: * Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
@ -181,7 +185,9 @@ inthand_t
IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31); IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31);
inthand_t inthand_t
Xinvltlb, /* TLB shootdowns */ Xinvltlb, /* TLB shootdowns - global */
Xinvlpg, /* TLB shootdowns - 1 page */
Xinvlrng, /* TLB shootdowns - page range */
Xhardclock, /* Forward hardclock() */ Xhardclock, /* Forward hardclock() */
Xstatclock, /* Forward statclock() */ Xstatclock, /* Forward statclock() */
Xcpuast, /* Additional software trap on other cpu */ Xcpuast, /* Additional software trap on other cpu */

View File

@ -499,14 +499,6 @@ icu_setup(int intr, driver_intr_t *handler, void *arg, int flags)
} }
else { else {
vector = TPR_SLOW_INTS + intr; vector = TPR_SLOW_INTS + intr;
#ifdef APIC_INTR_REORDER
#ifdef APIC_INTR_HIGHPRI_CLOCK
/* XXX: Hack (kludge?) for more accurate clock. */
if (intr == apic_8254_intr || intr == 8) {
vector = TPR_FAST_INTS + intr;
}
#endif
#endif
setidt(vector, slowintr[intr], setidt(vector, slowintr[intr],
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
} }

View File

@ -222,6 +222,9 @@ static struct witness_order_list_entry order_lists[] = {
{ "icu", &lock_class_mtx_spin }, { "icu", &lock_class_mtx_spin },
#ifdef SMP #ifdef SMP
{ "smp rendezvous", &lock_class_mtx_spin }, { "smp rendezvous", &lock_class_mtx_spin },
#ifdef __i386__
{ "tlb", &lock_class_mtx_spin },
#endif
#endif #endif
{ "clk", &lock_class_mtx_spin }, { "clk", &lock_class_mtx_spin },
{ NULL, NULL }, { NULL, NULL },