Re-enable the idle page-zeroing code. Remove all IPIs from the idle

page-zeroing code as well as from the general page-zeroing code and use a
lazy tlb page invalidation scheme based on a callback made at the end
of mi_switch.

A number of people came up with this idea at the same time so credit
belongs to Peter, John, and Jake as well.

Two-way SMP buildworld -j 5 tests (second run, after stabilization)
    2282.76 real  2515.17 user  704.22 sys	before peter's IPI commit
    2266.69 real  2467.50 user  633.77 sys	after peter's commit
    2232.80 real  2468.99 user  615.89 sys	after this commit

Reviewed by:	peter, jhb
Approved by:	peter
This commit is contained in:
dillon 2002-07-12 20:17:06 +00:00
parent c5495020cb
commit dc5d856e71
7 changed files with 142 additions and 22 deletions

View File

@ -89,6 +89,7 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level));
ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest));
ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin));
ASSYM(TD_MD, offsetof(struct thread, td_md));
ASSYM(P_MD, offsetof(struct proc, p_md));

View File

@ -2675,6 +2675,38 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
}
}
#ifdef SMP
/*
* pmap_zpi_switchin*()
*
* These functions allow us to avoid doing IPIs alltogether in certain
* temporary page-mapping situations (page zeroing). Instead to deal
* with being preempted and moved onto a different cpu we invalidate
* the page when the scheduler switches us in. This does not occur
* very often so we remain relatively optimal with very little effort.
*/
static void
pmap_zpi_switchin12(void)
{
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
}
static void
pmap_zpi_switchin2(void)
{
invlpg((u_int)CADDR2);
}
static void
pmap_zpi_switchin3(void)
{
invlpg((u_int)CADDR3);
}
#endif
/*
* pmap_zero_page zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
@ -2688,13 +2720,19 @@ pmap_zero_page(vm_page_t m)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin2;
#endif
invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
else
#endif
bzero(CADDR2, PAGE_SIZE);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP2 = 0;
}
@ -2713,13 +2751,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin2;
#endif
invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
else
#endif
bzero((char *)CADDR2 + off, size);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP2 = 0;
}
@ -2738,13 +2782,19 @@ pmap_zero_page_idle(vm_page_t m)
if (*CMAP3)
panic("pmap_zero_page: CMAP3 busy");
*CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin3;
#endif
invlpg((u_int)CADDR3);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR3);
else
#endif
bzero(CADDR3, PAGE_SIZE);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP3 = 0;
}
@ -2764,13 +2814,20 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
panic("pmap_copy_page: CMAP2 busy");
*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
/*
* XXX we "know" that CADDR2 immediately follows CADDR1 and use
* that to save an IPI on SMP systems.
*/
pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1,
(vm_offset_t)CADDR2 + PAGE_SIZE);
#ifdef I386_CPU
invltlb();
#else
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin12;
#endif
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
#endif
bcopy(CADDR1, CADDR2, PAGE_SIZE);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP1 = 0;
*CMAP2 = 0;
}

View File

@ -89,6 +89,7 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse));
ASSYM(TD_PROC, offsetof(struct thread, td_proc));
ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level));
ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest));
ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin));
ASSYM(TD_MD, offsetof(struct thread, td_md));
ASSYM(P_MD, offsetof(struct proc, p_md));

View File

@ -2675,6 +2675,38 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
}
}
#ifdef SMP
/*
* pmap_zpi_switchin*()
*
* These functions allow us to avoid doing IPIs alltogether in certain
* temporary page-mapping situations (page zeroing). Instead to deal
* with being preempted and moved onto a different cpu we invalidate
* the page when the scheduler switches us in. This does not occur
* very often so we remain relatively optimal with very little effort.
*/
static void
pmap_zpi_switchin12(void)
{
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
}
static void
pmap_zpi_switchin2(void)
{
invlpg((u_int)CADDR2);
}
static void
pmap_zpi_switchin3(void)
{
invlpg((u_int)CADDR3);
}
#endif
/*
* pmap_zero_page zeros the specified hardware page by mapping
* the page into KVM and using bzero to clear its contents.
@ -2688,13 +2720,19 @@ pmap_zero_page(vm_page_t m)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin2;
#endif
invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR2);
else
#endif
bzero(CADDR2, PAGE_SIZE);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP2 = 0;
}
@ -2713,13 +2751,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
if (*CMAP2)
panic("pmap_zero_page: CMAP2 busy");
*CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M;
pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2);
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin2;
#endif
invlpg((u_int)CADDR2);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE)
i686_pagezero(CADDR2);
else
#endif
bzero((char *)CADDR2 + off, size);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP2 = 0;
}
@ -2738,13 +2782,19 @@ pmap_zero_page_idle(vm_page_t m)
if (*CMAP3)
panic("pmap_zero_page: CMAP3 busy");
*CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M;
invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin3;
#endif
invlpg((u_int)CADDR3);
#if defined(I686_CPU)
if (cpu_class == CPUCLASS_686)
i686_pagezero(CADDR3);
else
#endif
bzero(CADDR3, PAGE_SIZE);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP3 = 0;
}
@ -2764,13 +2814,20 @@ pmap_copy_page(vm_page_t src, vm_page_t dst)
panic("pmap_copy_page: CMAP2 busy");
*CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
*CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
/*
* XXX we "know" that CADDR2 immediately follows CADDR1 and use
* that to save an IPI on SMP systems.
*/
pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1,
(vm_offset_t)CADDR2 + PAGE_SIZE);
#ifdef I386_CPU
invltlb();
#else
#ifdef SMP
curthread->td_switchin = pmap_zpi_switchin12;
#endif
invlpg((u_int)CADDR1);
invlpg((u_int)CADDR2);
#endif
bcopy(CADDR1, CADDR2, PAGE_SIZE);
#ifdef SMP
curthread->td_switchin = NULL;
#endif
*CMAP1 = 0;
*CMAP2 = 0;
}

View File

@ -892,6 +892,13 @@ mi_switch()
if (PCPU_GET(switchtime.sec) == 0)
binuptime(PCPU_PTR(switchtime));
PCPU_SET(switchticks, ticks);
/*
* Call the switchin function while still holding the scheduler lock
* (used by the idlezero code and the general page-zeroing code)
*/
if (td->td_switchin)
td->td_switchin();
}
/*

View File

@ -285,6 +285,7 @@ struct thread {
int td_intr_nesting_level; /* (k) Interrupt recursion. */
void *td_mailbox; /* the userland mailbox address */
struct ucred *td_ucred; /* (k) Reference to credentials. */
void (*td_switchin)(void); /* (k) switchin special func */
#define td_endzero td_md
#define td_startcopy td_endzero

View File

@ -30,11 +30,7 @@ static int cnt_prezero;
SYSCTL_INT(_vm_stats_misc, OID_AUTO,
cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, "");
#ifdef SMP
static int idlezero_enable = 0;
#else
static int idlezero_enable = 1;
#endif
SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0, "");
TUNABLE_INT("vm.idlezero_enable", &idlezero_enable);