Revive backed out pmap related changes from Feb 2002. The highlights are:

- It actually works this time, honest!
- Fine grained TLB shootdowns for SMP on i386.  IPI's are very expensive,
  so try and optimize things where possible.
- Introduce ranged shootdowns that can be done as a single IPI.
- PG_G support for i386
- Specific-cpu targeted shootdowns.  For example, there is no sense in
  globally purging the TLB cache for where we are stealing a page from
  the local unshared process on the local cpu.  Use pm_active to track
  this.
- Add some instrumentation for the tlb shootdown code.
- Rip out SMP code from <machine/cpufunc.h>
- Try and fix some very bogus PG_G and PG_PS interactions that were bad
  enough to cause vm86 bios calls to break.  vm86 depended on our existing
  bugs and this was the cause of the VESA panics last time.
- Fix the silly one-line error that caused the 'panic: bad pte' last time.
- Fix a couple of other silly one-line errors that should have caused more
  pain than they did.

Some more work is needed:
- pmap_{zero,copy}_page[_idle].  These can be done without IPI's if we
  have a hook in cpu_switch.
- The IPI handlers need some cleanup.  I have a bogus %ds load that can
  be avoided.
- APTD handling is rather bogus and appears to be a large source of
  global TLB IPI shootdowns for no really good reason.

I see speedups of between 1.5% and ~4% on buildworlds in a while 1 loop.
I expect to see a bigger difference when there is significant pageout
activity or the system otherwise has memory shortages.

I have backed out a few optimizations that I had been using over the last
few days in order to be a little more conservative.  I'll revisit these
again over the next few days as the dust settles.

New option:  DISABLE_PG_G - In case I missed something.
This commit is contained in:
peter 2002-07-12 07:56:11 +00:00
parent d8d0133632
commit bb717e117a
34 changed files with 2444 additions and 1031 deletions

View File

@ -260,30 +260,107 @@ Xspuriousint:
iret
/*
* Handle TLB shootdowns.
* Global address space TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvltlb
Xinvltlb:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
ss
incl xhits(,%eax,4)
incl xhits_gbl(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3
ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
.globl Xinvlpg
Xinvlpg:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_pg(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %eax
invlpg (%eax) /* invalidate single page */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Page range TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvlrng
Xinvlrng:
pushl %eax
pushl %edx
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_rng(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %edx
movl smp_tlb_addr2, %eax
1: invlpg (%edx) /* invalidate single page */
addl $PAGE_SIZE, %edx
cmpl %edx, %eax
jb 1b
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %edx
popl %eax
iret

View File

@ -323,7 +323,8 @@ bios16(struct bios_args *args, char *fmt, ...)
va_list ap;
int flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
u_int i, arg_start, arg_end;
u_int *pte, *ptd;
pt_entry_t *pte;
pd_entry_t *ptd;
arg_start = 0xffffffff;
arg_end = 0;
@ -382,19 +383,19 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
args->seg.code32.limit = 0xffff;
ptd = (u_int *)rcr3();
ptd = (pd_entry_t *)rcr3();
if (ptd == (u_int *)IdlePTD) {
/*
* no page table, so create one and install it.
*/
pte = (u_int *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = (u_int *)((u_int)ptd + KERNBASE);
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = (pd_entry_t *)((u_int)ptd + KERNBASE);
*ptd = vtophys(pte) | PG_RW | PG_V;
} else {
/*
* this is a user-level page table
*/
pte = (u_int *)&PTmap;
pte = PTmap;
}
/*
* install pointer to page 0. we don't need to flush the tlb,
@ -451,7 +452,7 @@ bios16(struct bios_args *args, char *fmt, ...)
i = bios16_call(&args->r, stack_top);
if (pte == (u_int *)&PTmap) {
if (pte == PTmap) {
*pte = 0; /* remove entry */
} else {
*ptd = 0; /* remove page table */
@ -461,7 +462,7 @@ bios16(struct bios_args *args, char *fmt, ...)
/*
* XXX only needs to be invlpg(0) but that doesn't work on the 386
*/
invltlb();
pmap_invalidate_all(kernel_pmap);
return (i);
}

View File

@ -276,7 +276,7 @@ db_write_bytes(addr, size, data)
}
}
invltlb();
pmap_invalidate_all(kernel_pmap);
}
dst = (char *)addr;
@ -292,7 +292,7 @@ db_write_bytes(addr, size, data)
if (ptep1)
*ptep1 = oldmap1;
invltlb();
pmap_invalidate_all(kernel_pmap);
}
}

View File

@ -127,6 +127,7 @@ HIDENAME(tmpstk):
.globl bootinfo
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
.globl KERNend
KERNend: .long 0 /* phys addr end of kernel (just after bss) */
physfree: .long 0 /* phys addr of next free page */
@ -381,12 +382,6 @@ begin:
movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
1:
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@ -809,14 +804,7 @@ no_kernend:
jne map_read_write
#endif
xorl %edx,%edx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 2f
orl $PG_G,%edx
#endif
2: movl $R(etext),%ecx
movl $R(etext),%ecx
addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
@ -827,13 +815,7 @@ no_kernend:
andl $~PAGE_MASK, %eax
map_read_write:
movl $PG_RW,%edx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
orl $PG_G,%edx
#endif
1: movl R(KERNend),%ecx
movl R(KERNend),%ecx
subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)

View File

@ -127,6 +127,7 @@ HIDENAME(tmpstk):
.globl bootinfo
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
.globl KERNend
KERNend: .long 0 /* phys addr end of kernel (just after bss) */
physfree: .long 0 /* phys addr of next free page */
@ -381,12 +382,6 @@ begin:
movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
1:
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@ -809,14 +804,7 @@ no_kernend:
jne map_read_write
#endif
xorl %edx,%edx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 2f
orl $PG_G,%edx
#endif
2: movl $R(etext),%ecx
movl $R(etext),%ecx
addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
@ -827,13 +815,7 @@ no_kernend:
andl $~PAGE_MASK, %eax
map_read_write:
movl $PG_RW,%edx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
orl $PG_G,%edx
#endif
1: movl R(KERNend),%ecx
movl R(KERNend),%ecx
subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)

View File

@ -288,6 +288,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/*
* Local data and functions.
*/
@ -336,6 +344,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
#endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
#endif
}
/*
@ -605,6 +616,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2190,48 +2205,237 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
u_int xhits[MAXCPU];
SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
"IU", "");
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/*
* Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
/*
* This is about as magic as it gets. fortune(1) has got similar code
* for reversing bits in a word. Who thinks up this stuff??
*
* Yes, it does appear to be consistently faster than:
* while (i = ffs(m)) {
* m >>= i;
* bits++;
* }
* and
* while (lsb = (m & -m)) { // This is magic too
* m &= ~lsb; // or: m ^= lsb
* bits++;
* }
* Both of these latter forms do some very strange things on gcc-3.1 with
* -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
* There is probably an SSE or MMX popcnt instruction.
*
* I wonder if this should be in libkern?
*
* XXX Stop the presses! Another one:
* static __inline u_int32_t
* popcnt1(u_int32_t v)
* {
* v -= ((v >> 1) & 0x55555555);
* v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
* v = (v + (v >> 4)) & 0x0F0F0F0F;
* return (v * 0x01010101) >> 24;
* }
* The downside is that it has a multiply. With a pentium3 with
* -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
* an imull, and in that case it is faster. In most other cases
* it appears slightly slower.
*/
static __inline u_int32_t
popcnt(u_int32_t m)
{
m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
return m;
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
int ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
ncpu = popcnt(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void
smp_invltlb(void)
{
#if defined(APIC_IO)
if (smp_started)
ipi_all_but_self(IPI_INVLTLB);
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */
}
void
invlpg(u_int addr)
smp_invlpg(vm_offset_t addr)
{
__asm __volatile("invlpg (%0)"::"r"(addr):"memory");
/* send a message to the other CPUs */
smp_invltlb();
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
}
void
invltlb(void)
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
u_long temp;
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3() is
* inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
void
smp_masked_invltlb(u_int mask)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */
smp_invltlb();
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
@ -2251,7 +2455,7 @@ ap_init(void)
/* spin */ ;
/* BSP may have changed PTD while we were waiting */
cpu_invltlb();
invltlb();
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -2290,6 +2494,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@ -2325,7 +2532,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}

View File

@ -288,6 +288,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/*
* Local data and functions.
*/
@ -336,6 +344,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
#endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
#endif
}
/*
@ -605,6 +616,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2190,48 +2205,237 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
u_int xhits[MAXCPU];
SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
"IU", "");
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/*
* Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
/*
* This is about as magic as it gets. fortune(1) has got similar code
* for reversing bits in a word. Who thinks up this stuff??
*
* Yes, it does appear to be consistently faster than:
* while (i = ffs(m)) {
* m >>= i;
* bits++;
* }
* and
* while (lsb = (m & -m)) { // This is magic too
* m &= ~lsb; // or: m ^= lsb
* bits++;
* }
* Both of these latter forms do some very strange things on gcc-3.1 with
* -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
* There is probably an SSE or MMX popcnt instruction.
*
* I wonder if this should be in libkern?
*
* XXX Stop the presses! Another one:
* static __inline u_int32_t
* popcnt1(u_int32_t v)
* {
* v -= ((v >> 1) & 0x55555555);
* v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
* v = (v + (v >> 4)) & 0x0F0F0F0F;
* return (v * 0x01010101) >> 24;
* }
* The downside is that it has a multiply. With a pentium3 with
* -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
* an imull, and in that case it is faster. In most other cases
* it appears slightly slower.
*/
static __inline u_int32_t
popcnt(u_int32_t m)
{
m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
return m;
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
int ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
ncpu = popcnt(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void
smp_invltlb(void)
{
#if defined(APIC_IO)
if (smp_started)
ipi_all_but_self(IPI_INVLTLB);
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */
}
void
invlpg(u_int addr)
smp_invlpg(vm_offset_t addr)
{
__asm __volatile("invlpg (%0)"::"r"(addr):"memory");
/* send a message to the other CPUs */
smp_invltlb();
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
}
void
invltlb(void)
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
u_long temp;
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3() is
* inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
void
smp_masked_invltlb(u_int mask)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */
smp_invltlb();
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
@ -2251,7 +2455,7 @@ ap_init(void)
/* spin */ ;
/* BSP may have changed PTD while we were waiting */
cpu_invltlb();
invltlb();
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -2290,6 +2494,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@ -2325,7 +2532,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}

View File

@ -68,7 +68,6 @@
* and to when physical maps must be made correct.
*/
#include "opt_disable_pse.h"
#include "opt_pmap.h"
#include "opt_msgbuf.h"
#include "opt_kstack_pages.h"
@ -85,6 +84,9 @@
#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/sysctl.h>
#ifdef SMP
#include <sys/smp.h>
#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -97,6 +99,7 @@
#include <vm/vm_pager.h>
#include <vm/uma.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
@ -162,6 +165,7 @@ static vm_object_t kptobj;
static int nkpt;
vm_offset_t kernel_vm_end;
extern u_int32_t KERNend;
/*
* Data for the pv entry allocation mechanism
@ -257,10 +261,10 @@ static vm_offset_t
pmap_kmem_choose(vm_offset_t addr)
{
vm_offset_t newaddr = addr;
#ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) {
if (cpu_feature & CPUID_PSE)
newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
}
#endif
return newaddr;
}
@ -362,10 +366,9 @@ pmap_bootstrap(firstaddr, loadaddr)
PTD[i] = 0;
pgeflag = 0;
#if !defined(SMP) /* XXX - see also mp_machdep.c */
if (cpu_feature & CPUID_PGE) {
#ifndef DISABLE_PG_G
if (cpu_feature & CPUID_PGE)
pgeflag = PG_G;
}
#endif
/*
@ -378,7 +381,7 @@ pmap_bootstrap(firstaddr, loadaddr)
*/
pdir4mb = 0;
#if !defined(DISABLE_PSE)
#ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) {
pd_entry_t ptditmp;
/*
@ -389,29 +392,16 @@ pmap_bootstrap(firstaddr, loadaddr)
ptditmp &= ~(NBPDR - 1);
ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
pdir4mb = ptditmp;
#if !defined(SMP)
/*
* Enable the PSE mode.
*/
load_cr4(rcr4() | CR4_PSE);
/*
* We can do the mapping here for the single processor
* case. We simply ignore the old page table page from
* now on.
*/
/*
* For SMP, we still need 4K pages to bootstrap APs,
* PSE will be enabled as soon as all APs are up.
*/
PTD[KPTDI] = (pd_entry_t) ptditmp;
kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
invltlb();
#endif
}
#endif
#ifndef SMP
/*
* Turn on PGE/PSE. SMP does this later on since the
* 4K page tables are required for AP boot (for now).
* XXX fixme.
*/
pmap_set_opt();
#endif
#ifdef SMP
if (cpu_apic_address == 0)
panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
@ -420,26 +410,55 @@ pmap_bootstrap(firstaddr, loadaddr)
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
(cpu_apic_address & PG_FRAME));
#endif
invltlb();
}
#ifdef SMP
/*
* Set 4mb pdir for mp startup
* Enable 4MB page mode for MP startup. Turn on PG_G support.
* BSP will run this after all the AP's have started up.
*/
void
pmap_set_opt(void)
{
pt_entry_t *pte;
vm_offset_t va, endva;
if (pgeflag && (cpu_feature & CPUID_PGE)) {
load_cr4(rcr4() | CR4_PGE);
invltlb(); /* Insurance */
}
#ifndef DISABLE_PSE
if (pseflag && (cpu_feature & CPUID_PSE)) {
load_cr4(rcr4() | CR4_PSE);
if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */
invltlb(); /* Insurance */
}
#endif
if (PCPU_GET(cpuid) == 0) {
#ifndef DISABLE_PSE
if (pdir4mb) {
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
cpu_invltlb();
invltlb(); /* Insurance */
}
#endif
if (pgeflag) {
/* Turn on PG_G for text, data, bss pages. */
va = (vm_offset_t)btext;
endva = KERNBASE + KERNend;
while (va < endva) {
pte = vtopte(va);
if (*pte)
*pte |= pgeflag;
va += PAGE_SIZE;
}
invltlb(); /* Insurance */
}
/*
* We do not need to broadcast the invltlb here, because
* each AP does it the moment it is released from the boot
* lock. See ap_init().
*/
}
}
#endif
void *
pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
@ -553,43 +572,151 @@ pmap_track_modified(vm_offset_t va)
return 0;
}
static PMAP_INLINE void
invltlb_1pg(vm_offset_t va)
{
#ifdef I386_CPU
invltlb();
#else
invlpg(va);
#endif
}
static __inline void
/*
* i386 only has "invalidate everything" and no SMP to worry about.
*/
PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
#if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask))
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
if (pmap->pm_active)
invltlb_1pg(va);
#endif
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
static __inline void
PMAP_INLINE void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
#if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask))
cpu_invltlb();
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
if (pmap->pm_active)
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
#endif
}
#else /* !I386_CPU */
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*/
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
u_int cpumask;
u_int other_cpus;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invlpg(va);
smp_invlpg(va);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
invlpg(va);
if (pmap->pm_active & other_cpus)
smp_masked_invlpg(pmap->pm_active & other_cpus, va);
}
critical_exit();
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
u_int cpumask;
u_int other_cpus;
vm_offset_t addr;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
smp_invlpg_range(sva, eva);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
if (pmap->pm_active & other_cpus)
smp_masked_invlpg_range(pmap->pm_active & other_cpus,
sva, eva);
}
critical_exit();
}
void
pmap_invalidate_all(pmap_t pmap)
{
u_int cpumask;
u_int other_cpus;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invltlb();
smp_invltlb();
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
invltlb();
if (pmap->pm_active & other_cpus)
smp_masked_invltlb(pmap->pm_active & other_cpus);
}
critical_exit();
}
#else /* !SMP */
/*
* Normal, non-SMP, 486+ invalidation functions.
* We inline these within pmap.c for speed.
*/
PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
if (pmap == kernel_pmap || pmap->pm_active)
invlpg(va);
}
PMAP_INLINE void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t addr;
if (pmap == kernel_pmap || pmap->pm_active)
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
#endif /* !SMP */
#endif /* !I386_CPU */
/*
* Return an address which is the base of the Virtual mapping of
@ -613,12 +740,7 @@ get_ptbase(pmap)
/* otherwise, we are alternate address space */
if (frame != (APTDpde & PG_FRAME)) {
APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb();
#endif
}
return APTmap;
}
@ -647,7 +769,7 @@ pmap_pte_quick(pmap, va)
newpf = pde & PG_FRAME;
if (((*PMAP1) & PG_FRAME) != newpf) {
*PMAP1 = newpf | PG_RW | PG_V;
invltlb_1pg((vm_offset_t) PADDR1);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
}
return PADDR1 + (index & (NPTEPG - 1));
}
@ -692,34 +814,29 @@ pmap_extract(pmap, va)
***************************************************/
/*
* add a wired page to the kva
* note that in order for the mapping to take effect -- you
* should do a invltlb after doing the pmap_kenter...
* Add a wired page to the kva.
* Note: not SMP coherent.
*/
PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa)
{
pt_entry_t *pte;
pt_entry_t npte, opte;
npte = pa | PG_RW | PG_V | pgeflag;
pte = vtopte(va);
opte = *pte;
*pte = npte;
invltlb_1pg(va);
*pte = pa | PG_RW | PG_V | pgeflag;
}
/*
* remove a page from the kernel pagetables
* Remove a page from the kernel pagetables.
* Note: not SMP coherent.
*/
PMAP_INLINE void
pmap_kremove(vm_offset_t va)
{
register pt_entry_t *pte;
pt_entry_t *pte;
pte = vtopte(va);
*pte = 0;
invltlb_1pg(va);
}
/*
@ -737,13 +854,15 @@ pmap_kremove(vm_offset_t va)
vm_offset_t
pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
{
vm_offset_t sva = *virt;
vm_offset_t va = sva;
vm_offset_t va, sva;
va = sva = *virt;
while (start < end) {
pmap_kenter(va, start);
va += PAGE_SIZE;
start += PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
*virt = va;
return (sva);
}
@ -756,64 +875,45 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
* page modification or references recorded.
* Note that old mappings are simply written
* over. The page *must* be wired.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{
vm_offset_t end_va;
vm_offset_t va;
end_va = va + count * PAGE_SIZE;
while (va < end_va) {
pt_entry_t *pte;
pte = vtopte(va);
*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va = sva;
while (count-- > 0) {
pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
va += PAGE_SIZE;
m++;
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, sva, va);
}
/*
* this routine jerks page mappings from the
* This routine tears out page mappings from the
* kernel -- it is meant only for temporary mappings.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qremove(vm_offset_t va, int count)
pmap_qremove(vm_offset_t sva, int count)
{
vm_offset_t end_va;
vm_offset_t va;
end_va = va + count*PAGE_SIZE;
while (va < end_va) {
pt_entry_t *pte;
pte = vtopte(va);
*pte = 0;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va = sva;
while (count-- > 0) {
pmap_kremove(va);
va += PAGE_SIZE;
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, sva, va);
}
static vm_page_t
pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
{
vm_page_t m;
retry:
m = vm_page_lookup(object, pindex);
if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
@ -829,14 +929,11 @@ pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
void
pmap_new_thread(struct thread *td)
{
#ifdef I386_CPU
int updateneeded = 0;
#endif
int i;
vm_page_t ma[KSTACK_PAGES];
vm_object_t ksobj;
vm_page_t m;
vm_offset_t ks;
pt_entry_t *ptek, oldpte;
/*
* allocate object for the kstack
@ -844,39 +941,21 @@ pmap_new_thread(struct thread *td)
ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES);
td->td_kstack_obj = ksobj;
#ifdef KSTACK_GUARD
/* get a kernel virtual address for the kstack for this thread */
#ifdef KSTACK_GUARD
ks = kmem_alloc_nofault(kernel_map, (KSTACK_PAGES + 1) * PAGE_SIZE);
if (ks == 0)
panic("pmap_new_thread: kstack allocation failed");
/*
* Set the first page to be the unmapped guard page.
*/
ptek = vtopte(ks);
oldpte = *ptek;
*ptek = 0;
if (oldpte) {
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks);
#endif
}
/*
* move to the next page, which is where the real stack starts.
*/
if (*vtopte(ks) != 0)
pmap_qremove(ks, 1);
ks += PAGE_SIZE;
td->td_kstack = ks;
ptek++;
#else
/* get a kernel virtual address for the kstack for this thread */
ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE);
if (ks == 0)
panic("pmap_new_thread: kstack allocation failed");
td->td_kstack = ks;
ptek = vtopte(ks);
#endif
/*
* For the length of the stack, link in a real page of ram for each
@ -887,6 +966,7 @@ pmap_new_thread(struct thread *td)
* Get a kernel stack page
*/
m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
ma[i] = m;
/*
* Wire the page
@ -894,28 +974,12 @@ pmap_new_thread(struct thread *td)
m->wire_count++;
cnt.v_wire_count++;
/*
* Enter the page into the kernel address space.
*/
oldpte = ptek[i];
ptek[i] = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
if (oldpte) {
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks + (i * PAGE_SIZE));
#endif
}
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
#ifdef I386_CPU
if (updateneeded)
invltlb();
#endif
pmap_qenter(ks, ma, KSTACK_PAGES);
}
/*
@ -930,26 +994,18 @@ pmap_dispose_thread(td)
vm_object_t ksobj;
vm_offset_t ks;
vm_page_t m;
pt_entry_t *ptek;
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
ptek = vtopte(ks);
pmap_qremove(ks, KSTACK_PAGES);
for (i = 0; i < KSTACK_PAGES; i++) {
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
vm_page_busy(m);
ptek[i] = 0;
#ifndef I386_CPU
invlpg(ks + (i * PAGE_SIZE));
#endif
vm_page_unwire(m, 0);
vm_page_free(m);
}
#ifdef I386_CPU
invltlb();
#endif
/*
* Free the space that this stack was mapped to in the kernel
* address map.
@ -976,13 +1032,13 @@ pmap_swapout_thread(td)
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
pmap_qremove(ks, KSTACK_PAGES);
for (i = 0; i < KSTACK_PAGES; i++) {
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_swapout_thread: kstack already missing?");
vm_page_dirty(m);
vm_page_unwire(m, 0);
pmap_kremove(ks + i * PAGE_SIZE);
}
}
@ -994,6 +1050,7 @@ pmap_swapin_thread(td)
struct thread *td;
{
int i, rv;
vm_page_t ma[KSTACK_PAGES];
vm_object_t ksobj;
vm_offset_t ks;
vm_page_t m;
@ -1002,7 +1059,6 @@ pmap_swapin_thread(td)
ks = td->td_kstack;
for (i = 0; i < KSTACK_PAGES; i++) {
m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m));
if (m->valid != VM_PAGE_BITS_ALL) {
rv = vm_pager_get_pages(ksobj, &m, 1, 0);
if (rv != VM_PAGER_OK)
@ -1010,10 +1066,12 @@ pmap_swapin_thread(td)
m = vm_page_lookup(ksobj, i);
m->valid = VM_PAGE_BITS_ALL;
}
ma[i] = m;
vm_page_wire(m);
vm_page_wakeup(m);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
}
pmap_qenter(ks, ma, KSTACK_PAGES);
}
/***************************************************
@ -1108,7 +1166,8 @@ pmap_pinit0(pmap)
{
pmap->pm_pdir =
(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t)IdlePTD);
invlpg((vm_offset_t)pmap->pm_pdir);
pmap->pm_ptphint = NULL;
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
@ -1153,7 +1212,7 @@ pmap_pinit(pmap)
vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
ptdpg->valid = VM_PAGE_BITS_ALL;
pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
pmap_qenter((vm_offset_t) pmap->pm_pdir, &ptdpg, 1);
if ((ptdpg->flags & PG_ZERO) == 0)
bzero(pmap->pm_pdir, PAGE_SIZE);
@ -1616,7 +1675,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
* PG_G.
*/
if (oldpte & PG_G)
invlpg(va);
pmap_invalidate_page(kernel_pmap, va);
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
m = PHYS_TO_VM_PAGE(oldpte);
@ -2028,13 +2087,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
if ((origpte & PG_RW) == 0) {
*pte |= PG_RW;
#ifdef SMP
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
pmap_invalidate_page(pmap, va);
}
return;
}
@ -2102,13 +2155,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((origpte & ~(PG_M|PG_A)) != newpte) {
*pte = newpte | PG_A;
/*if (origpte)*/ {
#ifdef SMP
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
pmap_invalidate_page(pmap, va);
}
}
}
@ -2222,7 +2269,11 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
void *
pmap_kenter_temporary(vm_offset_t pa, int i)
{
pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
vm_offset_t va;
va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
pmap_kenter(va, pa);
invlpg(va);
return ((void *)crashdumpmap);
}
@ -2527,7 +2578,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t pdnxt;
pd_entry_t src_frame, dst_frame;
vm_page_t m;
pd_entry_t saved_pde;
if (dst_addr != src_addr)
return;
@ -2537,17 +2587,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
return;
dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
if (dst_frame != (APTDpde & PG_FRAME)) {
APTDpde = dst_frame | PG_RW | PG_V;
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb();
#endif
}
saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V);
for(addr = src_addr; addr < end_addr; addr = pdnxt) {
for (addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr;
@ -2588,6 +2628,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pdnxt > end_addr)
pdnxt = end_addr;
/*
* Have to recheck this before every avtopte() call below
* in case we have blocked and something else used APTDpde.
*/
if (dst_frame != (APTDpde & PG_FRAME)) {
APTDpde = dst_frame | PG_RW | PG_V;
invltlb();
}
src_pte = vtopte(addr);
dst_pte = avtopte(addr);
while (addr < pdnxt) {
@ -2603,16 +2651,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block.
*/
dstmpte = pmap_allocpte(dst_pmap, addr);
if ((APTDpde & PG_FRAME) !=
(saved_pde & PG_FRAME)) {
APTDpde = saved_pde;
printf ("IT HAPPENNED!");
#if defined(SMP)
cpu_invltlb();
#else
invltlb();
#endif
}
if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/*
* Clear the modified and
@ -2644,14 +2682,13 @@ printf ("IT HAPPENNED!");
void
pmap_zero_page(vm_page_t m)
{
vm_offset_t phys = VM_PAGE_TO_PHYS(m);
vm_offset_t phys;
phys = VM_PAGE_TO_PHYS(m);
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
@ -2670,14 +2707,13 @@ pmap_zero_page(vm_page_t m)
void
pmap_zero_page_area(vm_page_t m, int off, int size)
{
vm_offset_t phys = VM_PAGE_TO_PHYS(m);
vm_offset_t phys;
phys = VM_PAGE_TO_PHYS(m);
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
@ -2696,20 +2732,13 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
void
pmap_zero_page_idle(vm_page_t m)
{
vm_offset_t phys = VM_PAGE_TO_PHYS(m);
vm_offset_t phys;
phys = VM_PAGE_TO_PHYS(m);
if (*CMAP3)
panic("pmap_zero_page: CMAP3 busy");
*CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
#ifdef SMP
mtx_lock(&Giant); /* IPI sender not MPSAFE */
#endif
invltlb_1pg((vm_offset_t)CADDR3);
#ifdef SMP
mtx_unlock(&Giant);
#endif
invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR3);
@ -2733,18 +2762,15 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
panic("pmap_copy_page: CMAP1 busy");
if (*CMAP2)
panic("pmap_copy_page: CMAP2 busy");
*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
#ifdef I386_CPU
invltlb();
#else
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
#endif
/*
* XXX we "know" that CADDR2 immediately follows CADDR1 and use
* that to save an IPI on SMP systems.
*/
pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1,
(vm_offset_t)CADDR2 + PAGE_SIZE);
bcopy(CADDR1, CADDR2, PAGE_SIZE);
*CMAP1 = 0;
*CMAP2 = 0;
}
@ -3176,18 +3202,11 @@ pmap_mapdev(pa, size)
for (tmpva = va; size > 0; ) {
pte = vtopte(tmpva);
*pte = pa | PG_RW | PG_V | pgeflag;
#ifdef SMP
cpu_invlpg((void *)tmpva);
#else
invltlb_1pg(tmpva);
#endif
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
pa += PAGE_SIZE;
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, va, tmpva);
return ((void *)(va + offset));
}
@ -3205,15 +3224,8 @@ pmap_unmapdev(va, size)
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
pte = vtopte(tmpva);
*pte = 0;
#ifdef SMP
cpu_invlpg((void *)tmpva);
#else
invltlb_1pg(tmpva);
#endif
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, va, tmpva);
kmem_free(kernel_map, base, size);
}

View File

@ -1596,42 +1596,6 @@ ENTRY(ssdtosd)
popl %ebx
ret
/* load_cr0(cr0) */
ENTRY(load_cr0)
movl 4(%esp),%eax
movl %eax,%cr0
ret
/* rcr0() */
ENTRY(rcr0)
movl %cr0,%eax
ret
/* rcr3() */
ENTRY(rcr3)
movl %cr3,%eax
ret
/* void load_cr3(caddr_t cr3) */
ENTRY(load_cr3)
#ifdef SWTCH_OPTIM_STATS
incl tlb_flush_count
#endif
movl 4(%esp),%eax
movl %eax,%cr3
ret
/* rcr4() */
ENTRY(rcr4)
movl %cr4,%eax
ret
/* void load_cr4(caddr_t cr4) */
ENTRY(load_cr4)
movl 4(%esp),%eax
movl %eax,%cr4
ret
/* void reset_dbregs() */
ENTRY(reset_dbregs)
movl $0,%eax

View File

@ -1596,42 +1596,6 @@ ENTRY(ssdtosd)
popl %ebx
ret
/* load_cr0(cr0) */
ENTRY(load_cr0)
movl 4(%esp),%eax
movl %eax,%cr0
ret
/* rcr0() */
ENTRY(rcr0)
movl %cr0,%eax
ret
/* rcr3() */
ENTRY(rcr3)
movl %cr3,%eax
ret
/* void load_cr3(caddr_t cr3) */
ENTRY(load_cr3)
#ifdef SWTCH_OPTIM_STATS
incl tlb_flush_count
#endif
movl 4(%esp),%eax
movl %eax,%cr3
ret
/* rcr4() */
ENTRY(rcr4)
movl %cr4,%eax
ret
/* void load_cr4(caddr_t cr4) */
ENTRY(load_cr4)
movl 4(%esp),%eax
movl %eax,%cr4
ret
/* void reset_dbregs() */
ENTRY(reset_dbregs)
movl $0,%eax

View File

@ -237,62 +237,6 @@ invd(void)
__asm __volatile("invd");
}
#if defined(SMP) && defined(_KERNEL)
/*
* When using APIC IPI's, invlpg() is not simply the invlpg instruction
* (this is a bug) and the inlining cost is prohibitive since the call
* executes into the IPI transmission system.
*/
void invlpg(u_int addr);
void invltlb(void);
static __inline void
cpu_invlpg(void *addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
cpu_invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
#else /* !(SMP && _KERNEL) */
static __inline void
invlpg(u_int addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#ifdef SWTCH_OPTIM_STATS
++tlb_flush_count;
#endif
}
#endif /* SMP && _KERNEL */
static __inline u_short
inw(u_int port)
{
@ -363,15 +307,6 @@ ia32_pause(void)
__asm __volatile("pause");
}
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline u_int
read_eflags(void)
{
@ -426,6 +361,86 @@ wrmsr(u_int msr, u_int64_t newval)
__asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
}
static __inline void
load_cr0(u_int data)
{
__asm __volatile("movl %0,%%cr0" : : "r" (data));
}
static __inline u_int
rcr0(void)
{
u_int data;
__asm __volatile("movl %%cr0,%0" : "=r" (data));
return (data);
}
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr3(u_int data)
{
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
static __inline u_int
rcr3(void)
{
u_int data;
__asm __volatile("movl %%cr3,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr4(u_int data)
{
__asm __volatile("movl %0,%%cr4" : : "r" (data));
}
static __inline u_int
rcr4(void)
{
u_int data;
__asm __volatile("movl %%cr4,%0" : "=r" (data));
return (data);
}
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
invltlb(void)
{
load_cr3(rcr3());
}
/*
* TLB flush for an individual page (even if it has PG_G).
* Only works on 486+ CPUs (i386 does not have PG_G).
*/
static __inline void
invlpg(u_int addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline u_int
rfs(void)
{
@ -587,6 +602,8 @@ intr_restore(register_t eflags)
int breakpoint(void);
u_int bsfl(u_int mask);
u_int bsrl(u_int mask);
void cpu_invlpg(u_int addr);
void cpu_invlpg_range(u_int start, u_int end);
void disable_intr(void);
void do_cpuid(u_int ax, u_int *p);
void enable_intr(void);
@ -597,8 +614,14 @@ void insl(u_int port, void *addr, size_t cnt);
void insw(u_int port, void *addr, size_t cnt);
void invd(void);
void invlpg(u_int addr);
void invlpg_range(u_int start, u_int end);
void invltlb(void);
u_short inw(u_int port);
void load_cr0(u_int cr0);
void load_cr3(u_int cr3);
void load_cr4(u_int cr4);
void load_fs(u_int sel);
void load_gs(u_int sel);
void outb(u_int port, u_char data);
void outl(u_int port, u_int data);
void outsb(u_int port, void *addr, size_t cnt);
@ -606,7 +629,12 @@ void outsl(u_int port, void *addr, size_t cnt);
void outsw(u_int port, void *addr, size_t cnt);
void outw(u_int port, u_short data);
void ia32_pause(void);
u_int rcr0(void);
u_int rcr2(void);
u_int rcr3(void);
u_int rcr4(void);
u_int rfs(void);
u_int rgs(void);
u_int64_t rdmsr(u_int msr);
u_int64_t rdpmc(u_int pmc);
u_int64_t rdtsc(void);
@ -614,10 +642,6 @@ u_int read_eflags(void);
void wbinvd(void);
void write_eflags(u_int ef);
void wrmsr(u_int msr, u_int64_t newval);
u_int rfs(void);
u_int rgs(void);
void load_fs(u_int sel);
void load_gs(u_int sel);
u_int rdr0(void);
void load_dr0(u_int dr0);
u_int rdr1(void);
@ -639,13 +663,7 @@ void intr_restore(register_t ef);
#endif /* __GNUC__ */
void load_cr0(u_int cr0);
void load_cr3(u_int cr3);
void load_cr4(u_int cr4);
void ltr(u_short sel);
u_int rcr0(void);
u_int rcr3(void);
u_int rcr4(void);
void reset_dbregs(void);
__END_DECLS

View File

@ -288,6 +288,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/*
* Local data and functions.
*/
@ -336,6 +344,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
#endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
#endif
}
/*
@ -605,6 +616,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2190,48 +2205,237 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
u_int xhits[MAXCPU];
SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
"IU", "");
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/*
* Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
/*
* This is about as magic as it gets. fortune(1) has got similar code
* for reversing bits in a word. Who thinks up this stuff??
*
* Yes, it does appear to be consistently faster than:
* while (i = ffs(m)) {
* m >>= i;
* bits++;
* }
* and
* while (lsb = (m & -m)) { // This is magic too
* m &= ~lsb; // or: m ^= lsb
* bits++;
* }
* Both of these latter forms do some very strange things on gcc-3.1 with
* -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
* There is probably an SSE or MMX popcnt instruction.
*
* I wonder if this should be in libkern?
*
* XXX Stop the presses! Another one:
* static __inline u_int32_t
* popcnt1(u_int32_t v)
* {
* v -= ((v >> 1) & 0x55555555);
* v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
* v = (v + (v >> 4)) & 0x0F0F0F0F;
* return (v * 0x01010101) >> 24;
* }
* The downside is that it has a multiply. With a pentium3 with
* -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
* an imull, and in that case it is faster. In most other cases
* it appears slightly slower.
*/
static __inline u_int32_t
popcnt(u_int32_t m)
{
m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
return m;
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
int ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
ncpu = popcnt(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void
smp_invltlb(void)
{
#if defined(APIC_IO)
if (smp_started)
ipi_all_but_self(IPI_INVLTLB);
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */
}
void
invlpg(u_int addr)
smp_invlpg(vm_offset_t addr)
{
__asm __volatile("invlpg (%0)"::"r"(addr):"memory");
/* send a message to the other CPUs */
smp_invltlb();
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
}
void
invltlb(void)
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
u_long temp;
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3() is
* inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
void
smp_masked_invltlb(u_int mask)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */
smp_invltlb();
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
@ -2251,7 +2455,7 @@ ap_init(void)
/* spin */ ;
/* BSP may have changed PTD while we were waiting */
cpu_invltlb();
invltlb();
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -2290,6 +2494,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@ -2325,7 +2532,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}

View File

@ -151,7 +151,7 @@ extern pt_entry_t PTmap[], APTmap[];
extern pd_entry_t PTD[], APTD[];
extern pd_entry_t PTDpde, APTDpde;
extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */
extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
#endif
#ifdef _KERNEL
@ -253,14 +253,15 @@ extern char *ptvmmap; /* poor name! */
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
void pmap_bootstrap( vm_offset_t, vm_offset_t);
void pmap_bootstrap(vm_offset_t, vm_offset_t);
void *pmap_mapdev(vm_offset_t, vm_size_t);
void pmap_unmapdev(vm_offset_t, vm_size_t);
pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
vm_page_t pmap_use_pt(pmap_t, vm_offset_t);
#ifdef SMP
void pmap_set_opt(void);
#endif
void pmap_invalidate_page(pmap_t, vm_offset_t);
void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
#endif /* _KERNEL */

View File

@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */
* Interprocessor interrupts for SMP.
*/
#define IPI_INVLTLB XINVLTLB_OFFSET
#define IPI_INVLPG XINVLPG_OFFSET
#define IPI_INVLRNG XINVLRNG_OFFSET
#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET
#define IPI_AST XCPUAST_OFFSET
#define IPI_STOP XCPUSTOP_OFFSET
@ -107,7 +109,6 @@ void assign_apic_irq(int apic, int intpin, int irq);
void revoke_apic_irq(int irq);
void bsp_apic_configure(void);
void init_secondary(void);
void smp_invltlb(void);
void forward_statclock(void);
void forwarded_statclock(struct trapframe frame);
void forward_hardclock(void);
@ -119,6 +120,13 @@ void ipi_self(u_int ipi);
#ifdef APIC_INTR_REORDER
void set_lapic_isrloc(int, int);
#endif /* APIC_INTR_REORDER */
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(u_int mask, vm_offset_t addr);
void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
void smp_masked_invlpg_range(u_int mask, vm_offset_t startva,
vm_offset_t endva);
void smp_invltlb(void);
void smp_masked_invltlb(u_int mask);
/* global data in mpapic.c */
extern volatile lapic_t lapic;

View File

@ -88,6 +88,7 @@
/* IDT vector base for regular (aka. slow) and fast interrupts */
#define TPR_SLOW_INTS 0x20
#define TPR_FAST_INTS 0x60
/* XXX note that the AST interrupt is at 0x50 */
/* blocking values for local APIC Task Priority Register */
#define TPR_BLOCK_HWI 0x4f /* hardware INTs */
@ -104,20 +105,23 @@
#endif /** TEST_TEST1 */
/* TLB shootdowns */
#define XINVLTLB_OFFSET (ICU_OFFSET + 112)
#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */
#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */
#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */
/* inter-cpu clock handling */
#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113)
#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114)
#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */
#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */
/* inter-CPU rendezvous */
#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115)
#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */
/* IPI to generate an additional software trap at the target CPU */
#define XCPUAST_OFFSET (ICU_OFFSET + 48)
/* XXX in the middle of the interrupt range, overlapping IRQ48 */
#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */
/* IPI to signal CPUs to stop and wait for another CPU to restart them */
#define XCPUSTOP_OFFSET (ICU_OFFSET + 128)
#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */
/*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
@ -194,7 +198,9 @@ inthand_t
IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31);
inthand_t
Xinvltlb, /* TLB shootdowns */
Xinvltlb, /* TLB shootdowns - global */
Xinvlpg, /* TLB shootdowns - 1 page */
Xinvlrng, /* TLB shootdowns - page range */
Xhardclock, /* Forward hardclock() */
Xstatclock, /* Forward statclock() */
Xcpuast, /* Additional software trap on other cpu */

View File

@ -1,10 +1,11 @@
# $FreeBSD$
# Options specific to the i386 platform kernels
DISABLE_PSE
MATH_EMULATE opt_math_emulate.h
GPL_MATH_EMULATE opt_math_emulate.h
DISABLE_PSE opt_pmap.h
PMAP_SHPGPERPROC opt_pmap.h
DISABLE_PG_G opt_pmap.h
PPC_PROBE_CHIPSET opt_ppc.h
PPC_DEBUG opt_ppc.h
SHOW_BUSYBUFS

View File

@ -1,10 +1,11 @@
# $FreeBSD$
# Options specific to the pc98 platform kernels
DISABLE_PSE
MATH_EMULATE opt_math_emulate.h
GPL_MATH_EMULATE opt_math_emulate.h
DISABLE_PSE opt_pmap.h
PMAP_SHPGPERPROC opt_pmap.h
DISABLE_PG_G opt_pmap.h
PPC_PROBE_CHIPSET opt_ppc.h
PPC_DEBUG opt_ppc.h
SHOW_BUSYBUFS

View File

@ -260,30 +260,107 @@ Xspuriousint:
iret
/*
* Handle TLB shootdowns.
* Global address space TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvltlb
Xinvltlb:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
ss
incl xhits(,%eax,4)
incl xhits_gbl(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3
ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
.globl Xinvlpg
Xinvlpg:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_pg(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %eax
invlpg (%eax) /* invalidate single page */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Page range TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvlrng
Xinvlrng:
pushl %eax
pushl %edx
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_rng(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %edx
movl smp_tlb_addr2, %eax
1: invlpg (%edx) /* invalidate single page */
addl $PAGE_SIZE, %edx
cmpl %edx, %eax
jb 1b
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %edx
popl %eax
iret

View File

@ -323,7 +323,8 @@ bios16(struct bios_args *args, char *fmt, ...)
va_list ap;
int flags = BIOSCODE_FLAG | BIOSDATA_FLAG;
u_int i, arg_start, arg_end;
u_int *pte, *ptd;
pt_entry_t *pte;
pd_entry_t *ptd;
arg_start = 0xffffffff;
arg_end = 0;
@ -382,19 +383,19 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.code32.base = (u_int)&bios16_jmp & PG_FRAME;
args->seg.code32.limit = 0xffff;
ptd = (u_int *)rcr3();
ptd = (pd_entry_t *)rcr3();
if (ptd == (u_int *)IdlePTD) {
/*
* no page table, so create one and install it.
*/
pte = (u_int *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = (u_int *)((u_int)ptd + KERNBASE);
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
ptd = (pd_entry_t *)((u_int)ptd + KERNBASE);
*ptd = vtophys(pte) | PG_RW | PG_V;
} else {
/*
* this is a user-level page table
*/
pte = (u_int *)&PTmap;
pte = PTmap;
}
/*
* install pointer to page 0. we don't need to flush the tlb,
@ -451,7 +452,7 @@ bios16(struct bios_args *args, char *fmt, ...)
i = bios16_call(&args->r, stack_top);
if (pte == (u_int *)&PTmap) {
if (pte == PTmap) {
*pte = 0; /* remove entry */
} else {
*ptd = 0; /* remove page table */
@ -461,7 +462,7 @@ bios16(struct bios_args *args, char *fmt, ...)
/*
* XXX only needs to be invlpg(0) but that doesn't work on the 386
*/
invltlb();
pmap_invalidate_all(kernel_pmap);
return (i);
}

View File

@ -276,7 +276,7 @@ db_write_bytes(addr, size, data)
}
}
invltlb();
pmap_invalidate_all(kernel_pmap);
}
dst = (char *)addr;
@ -292,7 +292,7 @@ db_write_bytes(addr, size, data)
if (ptep1)
*ptep1 = oldmap1;
invltlb();
pmap_invalidate_all(kernel_pmap);
}
}

View File

@ -127,6 +127,7 @@ HIDENAME(tmpstk):
.globl bootinfo
bootinfo: .space BOOTINFO_SIZE /* bootinfo that we can handle */
.globl KERNend
KERNend: .long 0 /* phys addr end of kernel (just after bss) */
physfree: .long 0 /* phys addr of next free page */
@ -381,12 +382,6 @@ begin:
movl IdlePTD,%esi
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
movl %cr4, %eax
orl $CR4_PGE, %eax
movl %eax, %cr4
1:
pushl physfree /* value of first for init386(first) */
call init386 /* wire 386 chip for unix operation */
@ -809,14 +804,7 @@ no_kernend:
jne map_read_write
#endif
xorl %edx,%edx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 2f
orl $PG_G,%edx
#endif
2: movl $R(etext),%ecx
movl $R(etext),%ecx
addl $PAGE_MASK,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)
@ -827,13 +815,7 @@ no_kernend:
andl $~PAGE_MASK, %eax
map_read_write:
movl $PG_RW,%edx
#if !defined(SMP)
testl $CPUID_PGE, R(cpu_feature)
jz 1f
orl $PG_G,%edx
#endif
1: movl R(KERNend),%ecx
movl R(KERNend),%ecx
subl %eax,%ecx
shrl $PAGE_SHIFT,%ecx
fillkptphys(%edx)

View File

@ -288,6 +288,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/*
* Local data and functions.
*/
@ -336,6 +344,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
#endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
#endif
}
/*
@ -605,6 +616,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2190,48 +2205,237 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
u_int xhits[MAXCPU];
SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
"IU", "");
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/*
* Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
/*
* This is about as magic as it gets. fortune(1) has got similar code
* for reversing bits in a word. Who thinks up this stuff??
*
* Yes, it does appear to be consistently faster than:
* while (i = ffs(m)) {
* m >>= i;
* bits++;
* }
* and
* while (lsb = (m & -m)) { // This is magic too
* m &= ~lsb; // or: m ^= lsb
* bits++;
* }
* Both of these latter forms do some very strange things on gcc-3.1 with
* -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
* There is probably an SSE or MMX popcnt instruction.
*
* I wonder if this should be in libkern?
*
* XXX Stop the presses! Another one:
* static __inline u_int32_t
* popcnt1(u_int32_t v)
* {
* v -= ((v >> 1) & 0x55555555);
* v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
* v = (v + (v >> 4)) & 0x0F0F0F0F;
* return (v * 0x01010101) >> 24;
* }
* The downside is that it has a multiply. With a pentium3 with
* -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
* an imull, and in that case it is faster. In most other cases
* it appears slightly slower.
*/
static __inline u_int32_t
popcnt(u_int32_t m)
{
m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
return m;
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
int ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
ncpu = popcnt(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void
smp_invltlb(void)
{
#if defined(APIC_IO)
if (smp_started)
ipi_all_but_self(IPI_INVLTLB);
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */
}
void
invlpg(u_int addr)
smp_invlpg(vm_offset_t addr)
{
__asm __volatile("invlpg (%0)"::"r"(addr):"memory");
/* send a message to the other CPUs */
smp_invltlb();
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
}
void
invltlb(void)
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
u_long temp;
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3() is
* inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
void
smp_masked_invltlb(u_int mask)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */
smp_invltlb();
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
@ -2251,7 +2455,7 @@ ap_init(void)
/* spin */ ;
/* BSP may have changed PTD while we were waiting */
cpu_invltlb();
invltlb();
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -2290,6 +2494,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@ -2325,7 +2532,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}

View File

@ -101,9 +101,6 @@ apic_initialize(void)
#endif /** TEST_TEST1 */
lapic.svr = temp;
if (bootverbose)
apic_dump("apic_initialize()");
}

View File

@ -288,6 +288,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/*
* Local data and functions.
*/
@ -336,6 +344,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
#endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
#endif
}
/*
@ -605,6 +616,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2190,48 +2205,237 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
u_int xhits[MAXCPU];
SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
"IU", "");
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/*
* Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
/*
* This is about as magic as it gets. fortune(1) has got similar code
* for reversing bits in a word. Who thinks up this stuff??
*
* Yes, it does appear to be consistently faster than:
* while (i = ffs(m)) {
* m >>= i;
* bits++;
* }
* and
* while (lsb = (m & -m)) { // This is magic too
* m &= ~lsb; // or: m ^= lsb
* bits++;
* }
* Both of these latter forms do some very strange things on gcc-3.1 with
* -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
* There is probably an SSE or MMX popcnt instruction.
*
* I wonder if this should be in libkern?
*
* XXX Stop the presses! Another one:
* static __inline u_int32_t
* popcnt1(u_int32_t v)
* {
* v -= ((v >> 1) & 0x55555555);
* v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
* v = (v + (v >> 4)) & 0x0F0F0F0F;
* return (v * 0x01010101) >> 24;
* }
* The downside is that it has a multiply. With a pentium3 with
* -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
* an imull, and in that case it is faster. In most other cases
* it appears slightly slower.
*/
static __inline u_int32_t
popcnt(u_int32_t m)
{
m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
return m;
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
int ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
ncpu = popcnt(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void
smp_invltlb(void)
{
#if defined(APIC_IO)
if (smp_started)
ipi_all_but_self(IPI_INVLTLB);
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */
}
void
invlpg(u_int addr)
smp_invlpg(vm_offset_t addr)
{
__asm __volatile("invlpg (%0)"::"r"(addr):"memory");
/* send a message to the other CPUs */
smp_invltlb();
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
}
void
invltlb(void)
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
u_long temp;
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3() is
* inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
void
smp_masked_invltlb(u_int mask)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */
smp_invltlb();
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
@ -2251,7 +2455,7 @@ ap_init(void)
/* spin */ ;
/* BSP may have changed PTD while we were waiting */
cpu_invltlb();
invltlb();
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -2290,6 +2494,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@ -2325,7 +2532,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}

View File

@ -68,7 +68,6 @@
* and to when physical maps must be made correct.
*/
#include "opt_disable_pse.h"
#include "opt_pmap.h"
#include "opt_msgbuf.h"
#include "opt_kstack_pages.h"
@ -85,6 +84,9 @@
#include <sys/user.h>
#include <sys/vmmeter.h>
#include <sys/sysctl.h>
#ifdef SMP
#include <sys/smp.h>
#endif
#include <vm/vm.h>
#include <vm/vm_param.h>
@ -97,6 +99,7 @@
#include <vm/vm_pager.h>
#include <vm/uma.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
@ -162,6 +165,7 @@ static vm_object_t kptobj;
static int nkpt;
vm_offset_t kernel_vm_end;
extern u_int32_t KERNend;
/*
* Data for the pv entry allocation mechanism
@ -257,10 +261,10 @@ static vm_offset_t
pmap_kmem_choose(vm_offset_t addr)
{
vm_offset_t newaddr = addr;
#ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) {
if (cpu_feature & CPUID_PSE)
newaddr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
}
#endif
return newaddr;
}
@ -362,10 +366,9 @@ pmap_bootstrap(firstaddr, loadaddr)
PTD[i] = 0;
pgeflag = 0;
#if !defined(SMP) /* XXX - see also mp_machdep.c */
if (cpu_feature & CPUID_PGE) {
#ifndef DISABLE_PG_G
if (cpu_feature & CPUID_PGE)
pgeflag = PG_G;
}
#endif
/*
@ -378,7 +381,7 @@ pmap_bootstrap(firstaddr, loadaddr)
*/
pdir4mb = 0;
#if !defined(DISABLE_PSE)
#ifndef DISABLE_PSE
if (cpu_feature & CPUID_PSE) {
pd_entry_t ptditmp;
/*
@ -389,29 +392,16 @@ pmap_bootstrap(firstaddr, loadaddr)
ptditmp &= ~(NBPDR - 1);
ptditmp |= PG_V | PG_RW | PG_PS | PG_U | pgeflag;
pdir4mb = ptditmp;
#if !defined(SMP)
/*
* Enable the PSE mode.
*/
load_cr4(rcr4() | CR4_PSE);
/*
* We can do the mapping here for the single processor
* case. We simply ignore the old page table page from
* now on.
*/
/*
* For SMP, we still need 4K pages to bootstrap APs,
* PSE will be enabled as soon as all APs are up.
*/
PTD[KPTDI] = (pd_entry_t) ptditmp;
kernel_pmap->pm_pdir[KPTDI] = (pd_entry_t) ptditmp;
invltlb();
#endif
}
#endif
#ifndef SMP
/*
* Turn on PGE/PSE. SMP does this later on since the
* 4K page tables are required for AP boot (for now).
* XXX fixme.
*/
pmap_set_opt();
#endif
#ifdef SMP
if (cpu_apic_address == 0)
panic("pmap_bootstrap: no local apic! (non-SMP hardware?)");
@ -420,26 +410,55 @@ pmap_bootstrap(firstaddr, loadaddr)
SMPpt[NPTEPG - 1] = (pt_entry_t)(PG_V | PG_RW | PG_N | pgeflag |
(cpu_apic_address & PG_FRAME));
#endif
invltlb();
}
#ifdef SMP
/*
* Set 4mb pdir for mp startup
* Enable 4MB page mode for MP startup. Turn on PG_G support.
* BSP will run this after all the AP's have started up.
*/
void
pmap_set_opt(void)
{
pt_entry_t *pte;
vm_offset_t va, endva;
if (pgeflag && (cpu_feature & CPUID_PGE)) {
load_cr4(rcr4() | CR4_PGE);
invltlb(); /* Insurance */
}
#ifndef DISABLE_PSE
if (pseflag && (cpu_feature & CPUID_PSE)) {
load_cr4(rcr4() | CR4_PSE);
if (pdir4mb && PCPU_GET(cpuid) == 0) { /* only on BSP */
invltlb(); /* Insurance */
}
#endif
if (PCPU_GET(cpuid) == 0) {
#ifndef DISABLE_PSE
if (pdir4mb) {
kernel_pmap->pm_pdir[KPTDI] = PTD[KPTDI] = pdir4mb;
cpu_invltlb();
invltlb(); /* Insurance */
}
#endif
if (pgeflag) {
/* Turn on PG_G for text, data, bss pages. */
va = (vm_offset_t)btext;
endva = KERNBASE + KERNend;
while (va < endva) {
pte = vtopte(va);
if (*pte)
*pte |= pgeflag;
va += PAGE_SIZE;
}
invltlb(); /* Insurance */
}
/*
* We do not need to broadcast the invltlb here, because
* each AP does it the moment it is released from the boot
* lock. See ap_init().
*/
}
}
#endif
void *
pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
@ -553,43 +572,151 @@ pmap_track_modified(vm_offset_t va)
return 0;
}
static PMAP_INLINE void
invltlb_1pg(vm_offset_t va)
{
#ifdef I386_CPU
invltlb();
#else
invlpg(va);
#endif
}
static __inline void
/*
* i386 only has "invalidate everything" and no SMP to worry about.
*/
PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
#if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask))
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
if (pmap->pm_active)
invltlb_1pg(va);
#endif
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
static __inline void
PMAP_INLINE void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
#if defined(SMP)
if (pmap->pm_active & PCPU_GET(cpumask))
cpu_invltlb();
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
if (pmap->pm_active)
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
#endif
}
#else /* !I386_CPU */
#ifdef SMP
/*
* For SMP, these functions have to use the IPI mechanism for coherence.
*/
void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
u_int cpumask;
u_int other_cpus;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invlpg(va);
smp_invlpg(va);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
invlpg(va);
if (pmap->pm_active & other_cpus)
smp_masked_invlpg(pmap->pm_active & other_cpus, va);
}
critical_exit();
}
void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
u_int cpumask;
u_int other_cpus;
vm_offset_t addr;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
smp_invlpg_range(sva, eva);
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
if (pmap->pm_active & other_cpus)
smp_masked_invlpg_range(pmap->pm_active & other_cpus,
sva, eva);
}
critical_exit();
}
void
pmap_invalidate_all(pmap_t pmap)
{
u_int cpumask;
u_int other_cpus;
critical_enter();
/*
* We need to disable interrupt preemption but MUST NOT have
* interrupts disabled here.
* XXX we may need to hold schedlock to get a coherent pm_active
*/
if (pmap->pm_active == -1 || pmap->pm_active == all_cpus) {
invltlb();
smp_invltlb();
} else {
cpumask = PCPU_GET(cpumask);
other_cpus = PCPU_GET(other_cpus);
if (pmap->pm_active & cpumask)
invltlb();
if (pmap->pm_active & other_cpus)
smp_masked_invltlb(pmap->pm_active & other_cpus);
}
critical_exit();
}
#else /* !SMP */
/*
* Normal, non-SMP, 486+ invalidation functions.
* We inline these within pmap.c for speed.
*/
PMAP_INLINE void
pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
{
if (pmap == kernel_pmap || pmap->pm_active)
invlpg(va);
}
PMAP_INLINE void
pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t addr;
if (pmap == kernel_pmap || pmap->pm_active)
for (addr = sva; addr < eva; addr += PAGE_SIZE)
invlpg(addr);
}
PMAP_INLINE void
pmap_invalidate_all(pmap_t pmap)
{
if (pmap == kernel_pmap || pmap->pm_active)
invltlb();
}
#endif /* !SMP */
#endif /* !I386_CPU */
/*
* Return an address which is the base of the Virtual mapping of
@ -613,12 +740,7 @@ get_ptbase(pmap)
/* otherwise, we are alternate address space */
if (frame != (APTDpde & PG_FRAME)) {
APTDpde = (pd_entry_t) (frame | PG_RW | PG_V);
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb();
#endif
}
return APTmap;
}
@ -647,7 +769,7 @@ pmap_pte_quick(pmap, va)
newpf = pde & PG_FRAME;
if (((*PMAP1) & PG_FRAME) != newpf) {
*PMAP1 = newpf | PG_RW | PG_V;
invltlb_1pg((vm_offset_t) PADDR1);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
}
return PADDR1 + (index & (NPTEPG - 1));
}
@ -692,34 +814,29 @@ pmap_extract(pmap, va)
***************************************************/
/*
* add a wired page to the kva
* note that in order for the mapping to take effect -- you
* should do a invltlb after doing the pmap_kenter...
* Add a wired page to the kva.
* Note: not SMP coherent.
*/
PMAP_INLINE void
pmap_kenter(vm_offset_t va, vm_offset_t pa)
{
pt_entry_t *pte;
pt_entry_t npte, opte;
npte = pa | PG_RW | PG_V | pgeflag;
pte = vtopte(va);
opte = *pte;
*pte = npte;
invltlb_1pg(va);
*pte = pa | PG_RW | PG_V | pgeflag;
}
/*
* remove a page from the kernel pagetables
* Remove a page from the kernel pagetables.
* Note: not SMP coherent.
*/
PMAP_INLINE void
pmap_kremove(vm_offset_t va)
{
register pt_entry_t *pte;
pt_entry_t *pte;
pte = vtopte(va);
*pte = 0;
invltlb_1pg(va);
}
/*
@ -737,13 +854,15 @@ pmap_kremove(vm_offset_t va)
vm_offset_t
pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
{
vm_offset_t sva = *virt;
vm_offset_t va = sva;
vm_offset_t va, sva;
va = sva = *virt;
while (start < end) {
pmap_kenter(va, start);
va += PAGE_SIZE;
start += PAGE_SIZE;
}
pmap_invalidate_range(kernel_pmap, sva, va);
*virt = va;
return (sva);
}
@ -756,64 +875,45 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot)
* page modification or references recorded.
* Note that old mappings are simply written
* over. The page *must* be wired.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qenter(vm_offset_t va, vm_page_t *m, int count)
pmap_qenter(vm_offset_t sva, vm_page_t *m, int count)
{
vm_offset_t end_va;
vm_offset_t va;
end_va = va + count * PAGE_SIZE;
while (va < end_va) {
pt_entry_t *pte;
pte = vtopte(va);
*pte = VM_PAGE_TO_PHYS(*m) | PG_RW | PG_V | pgeflag;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va = sva;
while (count-- > 0) {
pmap_kenter(va, VM_PAGE_TO_PHYS(*m));
va += PAGE_SIZE;
m++;
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, sva, va);
}
/*
* this routine jerks page mappings from the
* This routine tears out page mappings from the
* kernel -- it is meant only for temporary mappings.
* Note: SMP coherent. Uses a ranged shootdown IPI.
*/
void
pmap_qremove(vm_offset_t va, int count)
pmap_qremove(vm_offset_t sva, int count)
{
vm_offset_t end_va;
vm_offset_t va;
end_va = va + count*PAGE_SIZE;
while (va < end_va) {
pt_entry_t *pte;
pte = vtopte(va);
*pte = 0;
#ifdef SMP
cpu_invlpg((void *)va);
#else
invltlb_1pg(va);
#endif
va = sva;
while (count-- > 0) {
pmap_kremove(va);
va += PAGE_SIZE;
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, sva, va);
}
static vm_page_t
pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
{
vm_page_t m;
retry:
m = vm_page_lookup(object, pindex);
if (m && vm_page_sleep_busy(m, FALSE, "pplookp"))
@ -829,14 +929,11 @@ pmap_page_lookup(vm_object_t object, vm_pindex_t pindex)
void
pmap_new_thread(struct thread *td)
{
#ifdef I386_CPU
int updateneeded = 0;
#endif
int i;
vm_page_t ma[KSTACK_PAGES];
vm_object_t ksobj;
vm_page_t m;
vm_offset_t ks;
pt_entry_t *ptek, oldpte;
/*
* allocate object for the kstack
@ -844,39 +941,21 @@ pmap_new_thread(struct thread *td)
ksobj = vm_object_allocate(OBJT_DEFAULT, KSTACK_PAGES);
td->td_kstack_obj = ksobj;
#ifdef KSTACK_GUARD
/* get a kernel virtual address for the kstack for this thread */
#ifdef KSTACK_GUARD
ks = kmem_alloc_nofault(kernel_map, (KSTACK_PAGES + 1) * PAGE_SIZE);
if (ks == 0)
panic("pmap_new_thread: kstack allocation failed");
/*
* Set the first page to be the unmapped guard page.
*/
ptek = vtopte(ks);
oldpte = *ptek;
*ptek = 0;
if (oldpte) {
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks);
#endif
}
/*
* move to the next page, which is where the real stack starts.
*/
if (*vtopte(ks) != 0)
pmap_qremove(ks, 1);
ks += PAGE_SIZE;
td->td_kstack = ks;
ptek++;
#else
/* get a kernel virtual address for the kstack for this thread */
ks = kmem_alloc_nofault(kernel_map, KSTACK_PAGES * PAGE_SIZE);
if (ks == 0)
panic("pmap_new_thread: kstack allocation failed");
td->td_kstack = ks;
ptek = vtopte(ks);
#endif
/*
* For the length of the stack, link in a real page of ram for each
@ -887,6 +966,7 @@ pmap_new_thread(struct thread *td)
* Get a kernel stack page
*/
m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
ma[i] = m;
/*
* Wire the page
@ -894,28 +974,12 @@ pmap_new_thread(struct thread *td)
m->wire_count++;
cnt.v_wire_count++;
/*
* Enter the page into the kernel address space.
*/
oldpte = ptek[i];
ptek[i] = VM_PAGE_TO_PHYS(m) | PG_RW | PG_V | pgeflag;
if (oldpte) {
#ifdef I386_CPU
updateneeded = 1;
#else
invlpg(ks + (i * PAGE_SIZE));
#endif
}
vm_page_wakeup(m);
vm_page_flag_clear(m, PG_ZERO);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
m->valid = VM_PAGE_BITS_ALL;
}
#ifdef I386_CPU
if (updateneeded)
invltlb();
#endif
pmap_qenter(ks, ma, KSTACK_PAGES);
}
/*
@ -930,26 +994,18 @@ pmap_dispose_thread(td)
vm_object_t ksobj;
vm_offset_t ks;
vm_page_t m;
pt_entry_t *ptek;
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
ptek = vtopte(ks);
pmap_qremove(ks, KSTACK_PAGES);
for (i = 0; i < KSTACK_PAGES; i++) {
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_dispose_thread: kstack already missing?");
vm_page_busy(m);
ptek[i] = 0;
#ifndef I386_CPU
invlpg(ks + (i * PAGE_SIZE));
#endif
vm_page_unwire(m, 0);
vm_page_free(m);
}
#ifdef I386_CPU
invltlb();
#endif
/*
* Free the space that this stack was mapped to in the kernel
* address map.
@ -976,13 +1032,13 @@ pmap_swapout_thread(td)
ksobj = td->td_kstack_obj;
ks = td->td_kstack;
pmap_qremove(ks, KSTACK_PAGES);
for (i = 0; i < KSTACK_PAGES; i++) {
m = vm_page_lookup(ksobj, i);
if (m == NULL)
panic("pmap_swapout_thread: kstack already missing?");
vm_page_dirty(m);
vm_page_unwire(m, 0);
pmap_kremove(ks + i * PAGE_SIZE);
}
}
@ -994,6 +1050,7 @@ pmap_swapin_thread(td)
struct thread *td;
{
int i, rv;
vm_page_t ma[KSTACK_PAGES];
vm_object_t ksobj;
vm_offset_t ks;
vm_page_t m;
@ -1002,7 +1059,6 @@ pmap_swapin_thread(td)
ks = td->td_kstack;
for (i = 0; i < KSTACK_PAGES; i++) {
m = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
pmap_kenter(ks + i * PAGE_SIZE, VM_PAGE_TO_PHYS(m));
if (m->valid != VM_PAGE_BITS_ALL) {
rv = vm_pager_get_pages(ksobj, &m, 1, 0);
if (rv != VM_PAGER_OK)
@ -1010,10 +1066,12 @@ pmap_swapin_thread(td)
m = vm_page_lookup(ksobj, i);
m->valid = VM_PAGE_BITS_ALL;
}
ma[i] = m;
vm_page_wire(m);
vm_page_wakeup(m);
vm_page_flag_set(m, PG_MAPPED | PG_WRITEABLE);
}
pmap_qenter(ks, ma, KSTACK_PAGES);
}
/***************************************************
@ -1108,7 +1166,8 @@ pmap_pinit0(pmap)
{
pmap->pm_pdir =
(pd_entry_t *)kmem_alloc_pageable(kernel_map, PAGE_SIZE);
pmap_kenter((vm_offset_t) pmap->pm_pdir, (vm_offset_t) IdlePTD);
pmap_kenter((vm_offset_t)pmap->pm_pdir, (vm_offset_t)IdlePTD);
invlpg((vm_offset_t)pmap->pm_pdir);
pmap->pm_ptphint = NULL;
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
@ -1153,7 +1212,7 @@ pmap_pinit(pmap)
vm_page_flag_clear(ptdpg, PG_MAPPED | PG_BUSY); /* not usually mapped*/
ptdpg->valid = VM_PAGE_BITS_ALL;
pmap_kenter((vm_offset_t) pmap->pm_pdir, VM_PAGE_TO_PHYS(ptdpg));
pmap_qenter((vm_offset_t) pmap->pm_pdir, &ptdpg, 1);
if ((ptdpg->flags & PG_ZERO) == 0)
bzero(pmap->pm_pdir, PAGE_SIZE);
@ -1616,7 +1675,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
* PG_G.
*/
if (oldpte & PG_G)
invlpg(va);
pmap_invalidate_page(kernel_pmap, va);
pmap->pm_stats.resident_count -= 1;
if (oldpte & PG_MANAGED) {
m = PHYS_TO_VM_PAGE(oldpte);
@ -2028,13 +2087,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((prot & VM_PROT_WRITE) && (origpte & PG_V)) {
if ((origpte & PG_RW) == 0) {
*pte |= PG_RW;
#ifdef SMP
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
pmap_invalidate_page(pmap, va);
}
return;
}
@ -2102,13 +2155,7 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
if ((origpte & ~(PG_M|PG_A)) != newpte) {
*pte = newpte | PG_A;
/*if (origpte)*/ {
#ifdef SMP
cpu_invlpg((void *)va);
if (pmap->pm_active & PCPU_GET(other_cpus))
smp_invltlb();
#else
invltlb_1pg(va);
#endif
pmap_invalidate_page(pmap, va);
}
}
}
@ -2222,7 +2269,11 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
void *
pmap_kenter_temporary(vm_offset_t pa, int i)
{
pmap_kenter((vm_offset_t)crashdumpmap + (i * PAGE_SIZE), pa);
vm_offset_t va;
va = (vm_offset_t)crashdumpmap + (i * PAGE_SIZE);
pmap_kenter(va, pa);
invlpg(va);
return ((void *)crashdumpmap);
}
@ -2527,7 +2578,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
vm_offset_t pdnxt;
pd_entry_t src_frame, dst_frame;
vm_page_t m;
pd_entry_t saved_pde;
if (dst_addr != src_addr)
return;
@ -2537,17 +2587,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
return;
dst_frame = dst_pmap->pm_pdir[PTDPTDI] & PG_FRAME;
if (dst_frame != (APTDpde & PG_FRAME)) {
APTDpde = dst_frame | PG_RW | PG_V;
#if defined(SMP)
/* The page directory is not shared between CPUs */
cpu_invltlb();
#else
invltlb();
#endif
}
saved_pde = APTDpde & (PG_FRAME | PG_RW | PG_V);
for(addr = src_addr; addr < end_addr; addr = pdnxt) {
for (addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
pd_entry_t srcptepaddr;
@ -2588,6 +2628,14 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
if (pdnxt > end_addr)
pdnxt = end_addr;
/*
* Have to recheck this before every avtopte() call below
* in case we have blocked and something else used APTDpde.
*/
if (dst_frame != (APTDpde & PG_FRAME)) {
APTDpde = dst_frame | PG_RW | PG_V;
invltlb();
}
src_pte = vtopte(addr);
dst_pte = avtopte(addr);
while (addr < pdnxt) {
@ -2603,16 +2651,6 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block.
*/
dstmpte = pmap_allocpte(dst_pmap, addr);
if ((APTDpde & PG_FRAME) !=
(saved_pde & PG_FRAME)) {
APTDpde = saved_pde;
printf ("IT HAPPENNED!");
#if defined(SMP)
cpu_invltlb();
#else
invltlb();
#endif
}
if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/*
* Clear the modified and
@ -2644,14 +2682,13 @@ printf ("IT HAPPENNED!");
void
pmap_zero_page(vm_page_t m)
{
vm_offset_t phys = VM_PAGE_TO_PHYS(m);
vm_offset_t phys;
phys = VM_PAGE_TO_PHYS(m);
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
@ -2670,14 +2707,13 @@ pmap_zero_page(vm_page_t m)
void
pmap_zero_page_area(vm_page_t m, int off, int size)
{
vm_offset_t phys = VM_PAGE_TO_PHYS(m);
vm_offset_t phys;
phys = VM_PAGE_TO_PHYS(m);
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
invltlb_1pg((vm_offset_t)CADDR2);
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
@ -2696,20 +2732,13 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
void
pmap_zero_page_idle(vm_page_t m)
{
vm_offset_t phys = VM_PAGE_TO_PHYS(m);
vm_offset_t phys;
phys = VM_PAGE_TO_PHYS(m);
if (*CMAP3)
panic("pmap_zero_page: CMAP3 busy");
*CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
#ifdef SMP
mtx_lock(&Giant); /* IPI sender not MPSAFE */
#endif
invltlb_1pg((vm_offset_t)CADDR3);
#ifdef SMP
mtx_unlock(&Giant);
#endif
invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR3);
@ -2733,18 +2762,15 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
panic("pmap_copy_page: CMAP1 busy");
if (*CMAP2)
panic("pmap_copy_page: CMAP2 busy");
*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
#ifdef I386_CPU
invltlb();
#else
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
#endif
/*
* XXX we "know" that CADDR2 immediately follows CADDR1 and use
* that to save an IPI on SMP systems.
*/
pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1,
(vm_offset_t)CADDR2 + PAGE_SIZE);
bcopy(CADDR1, CADDR2, PAGE_SIZE);
*CMAP1 = 0;
*CMAP2 = 0;
}
@ -3176,18 +3202,11 @@ pmap_mapdev(pa, size)
for (tmpva = va; size > 0; ) {
pte = vtopte(tmpva);
*pte = pa | PG_RW | PG_V | pgeflag;
#ifdef SMP
cpu_invlpg((void *)tmpva);
#else
invltlb_1pg(tmpva);
#endif
size -= PAGE_SIZE;
tmpva += PAGE_SIZE;
pa += PAGE_SIZE;
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, va, tmpva);
return ((void *)(va + offset));
}
@ -3205,15 +3224,8 @@ pmap_unmapdev(va, size)
for (tmpva = base; tmpva < (base + size); tmpva += PAGE_SIZE) {
pte = vtopte(tmpva);
*pte = 0;
#ifdef SMP
cpu_invlpg((void *)tmpva);
#else
invltlb_1pg(tmpva);
#endif
}
#ifdef SMP
smp_invltlb();
#endif
pmap_invalidate_range(kernel_pmap, va, tmpva);
kmem_free(kernel_map, base, size);
}

View File

@ -1596,42 +1596,6 @@ ENTRY(ssdtosd)
popl %ebx
ret
/* load_cr0(cr0) */
ENTRY(load_cr0)
movl 4(%esp),%eax
movl %eax,%cr0
ret
/* rcr0() */
ENTRY(rcr0)
movl %cr0,%eax
ret
/* rcr3() */
ENTRY(rcr3)
movl %cr3,%eax
ret
/* void load_cr3(caddr_t cr3) */
ENTRY(load_cr3)
#ifdef SWTCH_OPTIM_STATS
incl tlb_flush_count
#endif
movl 4(%esp),%eax
movl %eax,%cr3
ret
/* rcr4() */
ENTRY(rcr4)
movl %cr4,%eax
ret
/* void load_cr4(caddr_t cr4) */
ENTRY(load_cr4)
movl 4(%esp),%eax
movl %eax,%cr4
ret
/* void reset_dbregs() */
ENTRY(reset_dbregs)
movl $0,%eax

View File

@ -603,6 +603,7 @@ vm86_datacall(intnum, vmf, vmc)
entry = vmc->pmap[i].pte_num;
vmc->pmap[i].old_pte = pte[entry];
pte[entry] = page | PG_V | PG_RW | PG_U;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
}
vmf->vmf_trapno = intnum;
@ -611,6 +612,7 @@ vm86_datacall(intnum, vmf, vmc)
for (i = 0; i < vmc->npages; i++) {
entry = vmc->pmap[i].pte_num;
pte[entry] = vmc->pmap[i].old_pte;
pmap_invalidate_page(kernel_pmap, vmc->pmap[i].kva);
}
mtx_unlock(&vm86_lock);

View File

@ -237,62 +237,6 @@ invd(void)
__asm __volatile("invd");
}
#if defined(SMP) && defined(_KERNEL)
/*
* When using APIC IPI's, invlpg() is not simply the invlpg instruction
* (this is a bug) and the inlining cost is prohibitive since the call
* executes into the IPI transmission system.
*/
void invlpg(u_int addr);
void invltlb(void);
static __inline void
cpu_invlpg(void *addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
cpu_invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
#else /* !(SMP && _KERNEL) */
static __inline void
invlpg(u_int addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline void
invltlb(void)
{
u_int temp;
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3()
* is inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3" : "=r" (temp)
: : "memory");
#ifdef SWTCH_OPTIM_STATS
++tlb_flush_count;
#endif
}
#endif /* SMP && _KERNEL */
static __inline u_short
inw(u_int port)
{
@ -363,15 +307,6 @@ ia32_pause(void)
__asm __volatile("pause");
}
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline u_int
read_eflags(void)
{
@ -426,6 +361,86 @@ wrmsr(u_int msr, u_int64_t newval)
__asm __volatile("wrmsr" : : "A" (newval), "c" (msr));
}
static __inline void
load_cr0(u_int data)
{
__asm __volatile("movl %0,%%cr0" : : "r" (data));
}
static __inline u_int
rcr0(void)
{
u_int data;
__asm __volatile("movl %%cr0,%0" : "=r" (data));
return (data);
}
static __inline u_int
rcr2(void)
{
u_int data;
__asm __volatile("movl %%cr2,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr3(u_int data)
{
__asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory");
#if defined(SWTCH_OPTIM_STATS)
++tlb_flush_count;
#endif
}
static __inline u_int
rcr3(void)
{
u_int data;
__asm __volatile("movl %%cr3,%0" : "=r" (data));
return (data);
}
static __inline void
load_cr4(u_int data)
{
__asm __volatile("movl %0,%%cr4" : : "r" (data));
}
static __inline u_int
rcr4(void)
{
u_int data;
__asm __volatile("movl %%cr4,%0" : "=r" (data));
return (data);
}
/*
* Global TLB flush (except for thise for pages marked PG_G)
*/
static __inline void
invltlb(void)
{
load_cr3(rcr3());
}
/*
* TLB flush for an individual page (even if it has PG_G).
* Only works on 486+ CPUs (i386 does not have PG_G).
*/
static __inline void
invlpg(u_int addr)
{
__asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory");
}
static __inline u_int
rfs(void)
{
@ -587,6 +602,8 @@ intr_restore(register_t eflags)
int breakpoint(void);
u_int bsfl(u_int mask);
u_int bsrl(u_int mask);
void cpu_invlpg(u_int addr);
void cpu_invlpg_range(u_int start, u_int end);
void disable_intr(void);
void do_cpuid(u_int ax, u_int *p);
void enable_intr(void);
@ -597,8 +614,14 @@ void insl(u_int port, void *addr, size_t cnt);
void insw(u_int port, void *addr, size_t cnt);
void invd(void);
void invlpg(u_int addr);
void invlpg_range(u_int start, u_int end);
void invltlb(void);
u_short inw(u_int port);
void load_cr0(u_int cr0);
void load_cr3(u_int cr3);
void load_cr4(u_int cr4);
void load_fs(u_int sel);
void load_gs(u_int sel);
void outb(u_int port, u_char data);
void outl(u_int port, u_int data);
void outsb(u_int port, void *addr, size_t cnt);
@ -606,7 +629,12 @@ void outsl(u_int port, void *addr, size_t cnt);
void outsw(u_int port, void *addr, size_t cnt);
void outw(u_int port, u_short data);
void ia32_pause(void);
u_int rcr0(void);
u_int rcr2(void);
u_int rcr3(void);
u_int rcr4(void);
u_int rfs(void);
u_int rgs(void);
u_int64_t rdmsr(u_int msr);
u_int64_t rdpmc(u_int pmc);
u_int64_t rdtsc(void);
@ -614,10 +642,6 @@ u_int read_eflags(void);
void wbinvd(void);
void write_eflags(u_int ef);
void wrmsr(u_int msr, u_int64_t newval);
u_int rfs(void);
u_int rgs(void);
void load_fs(u_int sel);
void load_gs(u_int sel);
u_int rdr0(void);
void load_dr0(u_int dr0);
u_int rdr1(void);
@ -639,13 +663,7 @@ void intr_restore(register_t ef);
#endif /* __GNUC__ */
void load_cr0(u_int cr0);
void load_cr3(u_int cr3);
void load_cr4(u_int cr4);
void ltr(u_short sel);
u_int rcr0(void);
u_int rcr3(void);
u_int rcr4(void);
void reset_dbregs(void);
__END_DECLS

View File

@ -288,6 +288,14 @@ extern pt_entry_t *SMPpt;
struct pcb stoppcbs[MAXCPU];
#ifdef APIC_IO
/* Variables needed for SMP tlb shootdown. */
vm_offset_t smp_tlb_addr1;
vm_offset_t smp_tlb_addr2;
volatile int smp_tlb_wait;
static struct mtx smp_tlb_mtx;
#endif
/*
* Local data and functions.
*/
@ -336,6 +344,9 @@ init_locks(void)
#ifdef USE_COMLOCK
mtx_init(&com_mtx, "com", NULL, MTX_SPIN);
#endif /* USE_COMLOCK */
#ifdef APIC_IO
mtx_init(&smp_tlb_mtx, "tlb", NULL, MTX_SPIN);
#endif
}
/*
@ -605,6 +616,10 @@ mp_enable(u_int boot_addr)
/* install an inter-CPU IPI for TLB invalidation */
setidt(XINVLTLB_OFFSET, Xinvltlb,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLPG_OFFSET, Xinvlpg,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
setidt(XINVLRNG_OFFSET, Xinvlrng,
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* install an inter-CPU IPI for forwarding hardclock() */
setidt(XHARDCLOCK_OFFSET, Xhardclock,
@ -2190,48 +2205,237 @@ start_ap(int logical_cpu, u_int boot_addr)
return 0; /* return FAILURE */
}
#if defined(APIC_IO) && defined(COUNT_XINVLTLB_HITS)
u_int xhits[MAXCPU];
SYSCTL_OPAQUE(_debug, OID_AUTO, xhits, CTLFLAG_RW, &xhits, sizeof(xhits),
"IU", "");
#if defined(APIC_IO)
#ifdef COUNT_XINVLTLB_HITS
u_int xhits_gbl[MAXCPU];
u_int xhits_pg[MAXCPU];
u_int xhits_rng[MAXCPU];
SYSCTL_NODE(_debug, OID_AUTO, xhits, CTLFLAG_RW, 0, "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, global, CTLFLAG_RW, &xhits_gbl,
sizeof(xhits_gbl), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, page, CTLFLAG_RW, &xhits_pg,
sizeof(xhits_pg), "IU", "");
SYSCTL_OPAQUE(_debug_xhits, OID_AUTO, range, CTLFLAG_RW, &xhits_rng,
sizeof(xhits_rng), "IU", "");
u_int ipi_global;
u_int ipi_page;
u_int ipi_range;
u_int ipi_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_global, CTLFLAG_RW, &ipi_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_page, CTLFLAG_RW, &ipi_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range, CTLFLAG_RW, &ipi_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_range_size, CTLFLAG_RW, &ipi_range_size,
0, "");
u_int ipi_masked_global;
u_int ipi_masked_page;
u_int ipi_masked_range;
u_int ipi_masked_range_size;
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_global, CTLFLAG_RW,
&ipi_masked_global, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_page, CTLFLAG_RW,
&ipi_masked_page, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range, CTLFLAG_RW,
&ipi_masked_range, 0, "");
SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
&ipi_masked_range_size, 0, "");
#endif
/*
* Flush the TLB on all other CPU's
*
* XXX: Needs to handshake and wait for completion before proceding.
*/
static void
smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
u_int ncpu;
register_t eflags;
ncpu = mp_ncpus - 1; /* does not shootdown self */
if (ncpu < 1)
return; /* no other cpus */
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
ipi_all_but_self(vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
/*
* This is about as magic as it gets. fortune(1) has got similar code
* for reversing bits in a word. Who thinks up this stuff??
*
* Yes, it does appear to be consistently faster than:
* while (i = ffs(m)) {
* m >>= i;
* bits++;
* }
* and
* while (lsb = (m & -m)) { // This is magic too
* m &= ~lsb; // or: m ^= lsb
* bits++;
* }
* Both of these latter forms do some very strange things on gcc-3.1 with
* -mcpu=pentiumpro and/or -march=pentiumpro and/or -O or -O2.
* There is probably an SSE or MMX popcnt instruction.
*
* I wonder if this should be in libkern?
*
* XXX Stop the presses! Another one:
* static __inline u_int32_t
* popcnt1(u_int32_t v)
* {
* v -= ((v >> 1) & 0x55555555);
* v = (v & 0x33333333) + ((v >> 2) & 0x33333333);
* v = (v + (v >> 4)) & 0x0F0F0F0F;
* return (v * 0x01010101) >> 24;
* }
* The downside is that it has a multiply. With a pentium3 with
* -mcpu=pentiumpro and -march=pentiumpro then gcc-3.1 will use
* an imull, and in that case it is faster. In most other cases
* it appears slightly slower.
*/
static __inline u_int32_t
popcnt(u_int32_t m)
{
m = (m & 0x55555555) + ((m & 0xaaaaaaaa) >> 1);
m = (m & 0x33333333) + ((m & 0xcccccccc) >> 2);
m = (m & 0x0f0f0f0f) + ((m & 0xf0f0f0f0) >> 4);
m = (m & 0x00ff00ff) + ((m & 0xff00ff00) >> 8);
m = (m & 0x0000ffff) + ((m & 0xffff0000) >> 16);
return m;
}
static void
smp_targeted_tlb_shootdown(u_int mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
{
int ncpu, othercpus;
register_t eflags;
othercpus = mp_ncpus - 1;
if (mask == (u_int)-1) {
ncpu = othercpus;
if (ncpu < 1)
return;
} else {
/* XXX there should be a pcpu self mask */
mask &= ~(1 << PCPU_GET(cpuid));
if (mask == 0)
return;
ncpu = popcnt(mask);
if (ncpu > othercpus) {
/* XXX this should be a panic offence */
printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
ncpu, othercpus);
ncpu = othercpus;
}
/* XXX should be a panic, implied by mask == 0 above */
if (ncpu < 1)
return;
}
eflags = read_eflags();
if ((eflags & PSL_I) == 0)
panic("absolutely cannot call smp_targeted_ipi_shootdown with interrupts already disabled");
mtx_lock_spin(&smp_tlb_mtx);
smp_tlb_addr1 = addr1;
smp_tlb_addr2 = addr2;
atomic_store_rel_int(&smp_tlb_wait, 0);
if (mask == (u_int)-1)
ipi_all_but_self(vector);
else
ipi_selected(mask, vector);
while (smp_tlb_wait < ncpu)
ia32_pause();
mtx_unlock_spin(&smp_tlb_mtx);
}
#endif
void
smp_invltlb(void)
{
#if defined(APIC_IO)
if (smp_started)
ipi_all_but_self(IPI_INVLTLB);
if (smp_started) {
smp_tlb_shootdown(IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_global++;
#endif
}
#endif /* APIC_IO */
}
void
invlpg(u_int addr)
smp_invlpg(vm_offset_t addr)
{
__asm __volatile("invlpg (%0)"::"r"(addr):"memory");
/* send a message to the other CPUs */
smp_invltlb();
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_page++;
#endif
}
#endif /* APIC_IO */
}
void
invltlb(void)
smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
{
u_long temp;
#if defined(APIC_IO)
if (smp_started) {
smp_tlb_shootdown(IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_range++;
ipi_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
/*
* This should be implemented as load_cr3(rcr3()) when load_cr3() is
* inlined.
*/
__asm __volatile("movl %%cr3, %0; movl %0, %%cr3":"=r"(temp) :: "memory");
void
smp_masked_invltlb(u_int mask)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLTLB, 0, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_global++;
#endif
}
#endif /* APIC_IO */
}
/* send a message to the other CPUs */
smp_invltlb();
void
smp_masked_invlpg(u_int mask, vm_offset_t addr)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLPG, addr, 0);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_page++;
#endif
}
#endif /* APIC_IO */
}
void
smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2)
{
#if defined(APIC_IO)
if (smp_started) {
smp_targeted_tlb_shootdown(mask, IPI_INVLRNG, addr1, addr2);
#ifdef COUNT_XINVLTLB_HITS
ipi_masked_range++;
ipi_masked_range_size += (addr2 - addr1) / PAGE_SIZE;
#endif
}
#endif /* APIC_IO */
}
@ -2251,7 +2455,7 @@ ap_init(void)
/* spin */ ;
/* BSP may have changed PTD while we were waiting */
cpu_invltlb();
invltlb();
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
lidt(&r_idt);
@ -2290,6 +2494,9 @@ ap_init(void)
/* Build our map of 'other' CPUs. */
PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
if (bootverbose)
apic_dump("ap_init()");
printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
if (smp_cpus == mp_ncpus) {
@ -2325,7 +2532,8 @@ forwarded_statclock(struct trapframe frame)
{
mtx_lock_spin(&sched_lock);
statclock_process(curthread->td_kse, TRAPF_PC(&frame), TRAPF_USERMODE(&frame));
statclock_process(curthread->td_kse, TRAPF_PC(&frame),
TRAPF_USERMODE(&frame));
mtx_unlock_spin(&sched_lock);
}

View File

@ -151,7 +151,7 @@ extern pt_entry_t PTmap[], APTmap[];
extern pd_entry_t PTD[], APTD[];
extern pd_entry_t PTDpde, APTDpde;
extern pd_entry_t IdlePTD; /* physical address of "Idle" state directory */
extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
#endif
#ifdef _KERNEL
@ -253,14 +253,15 @@ extern char *ptvmmap; /* poor name! */
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
void pmap_bootstrap( vm_offset_t, vm_offset_t);
void pmap_bootstrap(vm_offset_t, vm_offset_t);
void *pmap_mapdev(vm_offset_t, vm_size_t);
void pmap_unmapdev(vm_offset_t, vm_size_t);
pt_entry_t *pmap_pte(pmap_t, vm_offset_t) __pure2;
vm_page_t pmap_use_pt(pmap_t, vm_offset_t);
#ifdef SMP
void pmap_set_opt(void);
#endif
void pmap_invalidate_page(pmap_t, vm_offset_t);
void pmap_invalidate_range(pmap_t, vm_offset_t, vm_offset_t);
void pmap_invalidate_all(pmap_t);
#endif /* _KERNEL */

View File

@ -51,6 +51,8 @@ extern int current_postcode; /** XXX currently in mp_machdep.c */
* Interprocessor interrupts for SMP.
*/
#define IPI_INVLTLB XINVLTLB_OFFSET
#define IPI_INVLPG XINVLPG_OFFSET
#define IPI_INVLRNG XINVLRNG_OFFSET
#define IPI_RENDEZVOUS XRENDEZVOUS_OFFSET
#define IPI_AST XCPUAST_OFFSET
#define IPI_STOP XCPUSTOP_OFFSET
@ -107,7 +109,6 @@ void assign_apic_irq(int apic, int intpin, int irq);
void revoke_apic_irq(int irq);
void bsp_apic_configure(void);
void init_secondary(void);
void smp_invltlb(void);
void forward_statclock(void);
void forwarded_statclock(struct trapframe frame);
void forward_hardclock(void);
@ -119,6 +120,13 @@ void ipi_self(u_int ipi);
#ifdef APIC_INTR_REORDER
void set_lapic_isrloc(int, int);
#endif /* APIC_INTR_REORDER */
void smp_invlpg(vm_offset_t addr);
void smp_masked_invlpg(u_int mask, vm_offset_t addr);
void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
void smp_masked_invlpg_range(u_int mask, vm_offset_t startva,
vm_offset_t endva);
void smp_invltlb(void);
void smp_masked_invltlb(u_int mask);
/* global data in mpapic.c */
extern volatile lapic_t lapic;

View File

@ -260,30 +260,107 @@ Xspuriousint:
iret
/*
* Handle TLB shootdowns.
* Global address space TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvltlb
Xinvltlb:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
ss
incl xhits(,%eax,4)
incl xhits_gbl(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl %cr3, %eax /* invalidate the TLB */
movl %eax, %cr3
ss /* stack segment, avoid %ds load */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Single page TLB shootdown
*/
.text
SUPERALIGN_TEXT
.globl Xinvlpg
Xinvlpg:
pushl %eax
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_pg(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %eax
invlpg (%eax) /* invalidate single page */
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %eax
iret
/*
* Page range TLB shootdown.
*/
.text
SUPERALIGN_TEXT
.globl Xinvlrng
Xinvlrng:
pushl %eax
pushl %edx
pushl %ds
movl $KDSEL, %eax /* Kernel data selector */
mov %ax, %ds
#ifdef COUNT_XINVLTLB_HITS
pushl %fs
movl $KPSEL, %eax /* Private space selector */
mov %ax, %fs
movl PCPU(CPUID), %eax
popl %fs
incl xhits_rng(,%eax,4)
#endif /* COUNT_XINVLTLB_HITS */
movl smp_tlb_addr1, %edx
movl smp_tlb_addr2, %eax
1: invlpg (%edx) /* invalidate single page */
addl $PAGE_SIZE, %edx
cmpl %edx, %eax
jb 1b
movl $0, lapic+LA_EOI /* End Of Interrupt to APIC */
lock
incl smp_tlb_wait
popl %ds
popl %edx
popl %eax
iret

View File

@ -88,6 +88,7 @@
/* IDT vector base for regular (aka. slow) and fast interrupts */
#define TPR_SLOW_INTS 0x20
#define TPR_FAST_INTS 0x60
/* XXX note that the AST interrupt is at 0x50 */
/* blocking values for local APIC Task Priority Register */
#define TPR_BLOCK_HWI 0x4f /* hardware INTs */
@ -104,20 +105,23 @@
#endif /** TEST_TEST1 */
/* TLB shootdowns */
#define XINVLTLB_OFFSET (ICU_OFFSET + 112)
#define XINVLTLB_OFFSET (ICU_OFFSET + 112) /* 0x90 */
#define XINVLPG_OFFSET (ICU_OFFSET + 113) /* 0x91 */
#define XINVLRNG_OFFSET (ICU_OFFSET + 114) /* 0x92 */
/* inter-cpu clock handling */
#define XHARDCLOCK_OFFSET (ICU_OFFSET + 113)
#define XSTATCLOCK_OFFSET (ICU_OFFSET + 114)
#define XHARDCLOCK_OFFSET (ICU_OFFSET + 120) /* 0x98 */
#define XSTATCLOCK_OFFSET (ICU_OFFSET + 121) /* 0x99 */
/* inter-CPU rendezvous */
#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 115)
#define XRENDEZVOUS_OFFSET (ICU_OFFSET + 122) /* 0x9A */
/* IPI to generate an additional software trap at the target CPU */
#define XCPUAST_OFFSET (ICU_OFFSET + 48)
/* XXX in the middle of the interrupt range, overlapping IRQ48 */
#define XCPUAST_OFFSET (ICU_OFFSET + 48) /* 0x50 */
/* IPI to signal CPUs to stop and wait for another CPU to restart them */
#define XCPUSTOP_OFFSET (ICU_OFFSET + 128)
#define XCPUSTOP_OFFSET (ICU_OFFSET + 128) /* 0xA0 */
/*
* Note: this vector MUST be xxxx1111, 32 + 223 = 255 = 0xff:
@ -194,7 +198,9 @@ inthand_t
IDTVEC(intr28), IDTVEC(intr29), IDTVEC(intr30), IDTVEC(intr31);
inthand_t
Xinvltlb, /* TLB shootdowns */
Xinvltlb, /* TLB shootdowns - global */
Xinvlpg, /* TLB shootdowns - 1 page */
Xinvlrng, /* TLB shootdowns - page range */
Xhardclock, /* Forward hardclock() */
Xstatclock, /* Forward statclock() */
Xcpuast, /* Additional software trap on other cpu */

View File

@ -223,6 +223,9 @@ static struct witness_order_list_entry order_lists[] = {
{ "icu", &lock_class_mtx_spin },
#ifdef SMP
{ "smp rendezvous", &lock_class_mtx_spin },
#if defined(__i386__) && defined(APIC_IO)
{ "tlb", &lock_class_mtx_spin },
#endif
#endif
{ "clk", &lock_class_mtx_spin },
{ "mutex profiling lock", &lock_class_mtx_spin },