From dc5d856e710c7a79039e46c89de1624670a57c92 Mon Sep 17 00:00:00 2001 From: dillon Date: Fri, 12 Jul 2002 20:17:06 +0000 Subject: [PATCH] Re-enable the idle page-zeroing code. Remove all IPIs from the idle page-zeroing code as well as from the general page-zeroing code and use a lazy tlb page invalidation scheme based on a callback made at the end of mi_switch. A number of people came up with this idea at the same time so credit belongs to Peter, John, and Jake as well. Two-way SMP buildworld -j 5 tests (second run, after stabilization) 2282.76 real 2515.17 user 704.22 sys before peter's IPI commit 2266.69 real 2467.50 user 633.77 sys after peter's commit 2232.80 real 2468.99 user 615.89 sys after this commit Reviewed by: peter, jhb Approved by: peter --- sys/amd64/amd64/genassym.c | 1 + sys/amd64/amd64/pmap.c | 75 +++++++++++++++++++++++++++++++++----- sys/i386/i386/genassym.c | 1 + sys/i386/i386/pmap.c | 75 +++++++++++++++++++++++++++++++++----- sys/kern/kern_synch.c | 7 ++++ sys/sys/proc.h | 1 + sys/vm/vm_zeroidle.c | 4 -- 7 files changed, 142 insertions(+), 22 deletions(-) diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index ee890dbd8628..06c87f004204 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -89,6 +89,7 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); +ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(P_MD, offsetof(struct proc, p_md)); diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 5de170703aef..1a85a52eae7d 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -2675,6 +2675,38 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, } } +#ifdef SMP + +/* + * pmap_zpi_switchin*() + * + * These functions allow us to avoid doing IPIs alltogether in certain + * temporary page-mapping situations (page zeroing). Instead to deal + * with being preempted and moved onto a different cpu we invalidate + * the page when the scheduler switches us in. This does not occur + * very often so we remain relatively optimal with very little effort. + */ +static void +pmap_zpi_switchin12(void) +{ + invlpg((u_int)CADDR1); + invlpg((u_int)CADDR2); +} + +static void +pmap_zpi_switchin2(void) +{ + invlpg((u_int)CADDR2); +} + +static void +pmap_zpi_switchin3(void) +{ + invlpg((u_int)CADDR3); +} + +#endif + /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. @@ -2688,13 +2720,19 @@ pmap_zero_page(vm_page_t m) if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2); +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin2; +#endif + invlpg((u_int)CADDR2); #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); else #endif bzero(CADDR2, PAGE_SIZE); +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP2 = 0; } @@ -2713,13 +2751,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size) if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2); +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin2; +#endif + invlpg((u_int)CADDR2); #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); else #endif bzero((char *)CADDR2 + off, size); +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP2 = 0; } @@ -2738,13 +2782,19 @@ pmap_zero_page_idle(vm_page_t m) if (*CMAP3) panic("pmap_zero_page: CMAP3 busy"); *CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M; - invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */ +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin3; +#endif + invlpg((u_int)CADDR3); #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR3); else #endif bzero(CADDR3, PAGE_SIZE); +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP3 = 0; } @@ -2764,13 +2814,20 @@ pmap_copy_page(vm_page_t src, vm_page_t dst) panic("pmap_copy_page: CMAP2 busy"); *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; - /* - * XXX we "know" that CADDR2 immediately follows CADDR1 and use - * that to save an IPI on SMP systems. - */ - pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1, - (vm_offset_t)CADDR2 + PAGE_SIZE); +#ifdef I386_CPU + invltlb(); +#else +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin12; +#endif + invlpg((u_int)CADDR1); + invlpg((u_int)CADDR2); +#endif bcopy(CADDR1, CADDR2, PAGE_SIZE); + +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP1 = 0; *CMAP2 = 0; } diff --git a/sys/i386/i386/genassym.c b/sys/i386/i386/genassym.c index ee890dbd8628..06c87f004204 100644 --- a/sys/i386/i386/genassym.c +++ b/sys/i386/i386/genassym.c @@ -89,6 +89,7 @@ ASSYM(TD_KSE, offsetof(struct thread, td_kse)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_INTR_NESTING_LEVEL, offsetof(struct thread, td_intr_nesting_level)); ASSYM(TD_CRITNEST, offsetof(struct thread, td_critnest)); +ASSYM(TD_SWITCHIN, offsetof(struct thread, td_switchin)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(P_MD, offsetof(struct proc, p_md)); diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 5de170703aef..1a85a52eae7d 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -2675,6 +2675,38 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, } } +#ifdef SMP + +/* + * pmap_zpi_switchin*() + * + * These functions allow us to avoid doing IPIs alltogether in certain + * temporary page-mapping situations (page zeroing). Instead to deal + * with being preempted and moved onto a different cpu we invalidate + * the page when the scheduler switches us in. This does not occur + * very often so we remain relatively optimal with very little effort. + */ +static void +pmap_zpi_switchin12(void) +{ + invlpg((u_int)CADDR1); + invlpg((u_int)CADDR2); +} + +static void +pmap_zpi_switchin2(void) +{ + invlpg((u_int)CADDR2); +} + +static void +pmap_zpi_switchin3(void) +{ + invlpg((u_int)CADDR3); +} + +#endif + /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. @@ -2688,13 +2720,19 @@ pmap_zero_page(vm_page_t m) if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2); +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin2; +#endif + invlpg((u_int)CADDR2); #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR2); else #endif bzero(CADDR2, PAGE_SIZE); +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP2 = 0; } @@ -2713,13 +2751,19 @@ pmap_zero_page_area(vm_page_t m, int off, int size) if (*CMAP2) panic("pmap_zero_page: CMAP2 busy"); *CMAP2 = PG_V | PG_RW | phys | PG_A | PG_M; - pmap_invalidate_page(kernel_pmap, (vm_offset_t)CADDR2); +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin2; +#endif + invlpg((u_int)CADDR2); #if defined(I686_CPU) if (cpu_class == CPUCLASS_686 && off == 0 && size == PAGE_SIZE) i686_pagezero(CADDR2); else #endif bzero((char *)CADDR2 + off, size); +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP2 = 0; } @@ -2738,13 +2782,19 @@ pmap_zero_page_idle(vm_page_t m) if (*CMAP3) panic("pmap_zero_page: CMAP3 busy"); *CMAP3 = PG_V | PG_RW | phys | PG_A | PG_M; - invlpg((vm_offset_t)CADDR3); /* SMP: local cpu only */ +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin3; +#endif + invlpg((u_int)CADDR3); #if defined(I686_CPU) if (cpu_class == CPUCLASS_686) i686_pagezero(CADDR3); else #endif bzero(CADDR3, PAGE_SIZE); +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP3 = 0; } @@ -2764,13 +2814,20 @@ pmap_copy_page(vm_page_t src, vm_page_t dst) panic("pmap_copy_page: CMAP2 busy"); *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A; *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M; - /* - * XXX we "know" that CADDR2 immediately follows CADDR1 and use - * that to save an IPI on SMP systems. - */ - pmap_invalidate_range(kernel_pmap, (vm_offset_t)CADDR1, - (vm_offset_t)CADDR2 + PAGE_SIZE); +#ifdef I386_CPU + invltlb(); +#else +#ifdef SMP + curthread->td_switchin = pmap_zpi_switchin12; +#endif + invlpg((u_int)CADDR1); + invlpg((u_int)CADDR2); +#endif bcopy(CADDR1, CADDR2, PAGE_SIZE); + +#ifdef SMP + curthread->td_switchin = NULL; +#endif *CMAP1 = 0; *CMAP2 = 0; } diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 765f4b0270c4..53ab46fb8907 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -892,6 +892,13 @@ mi_switch() if (PCPU_GET(switchtime.sec) == 0) binuptime(PCPU_PTR(switchtime)); PCPU_SET(switchticks, ticks); + + /* + * Call the switchin function while still holding the scheduler lock + * (used by the idlezero code and the general page-zeroing code) + */ + if (td->td_switchin) + td->td_switchin(); } /* diff --git a/sys/sys/proc.h b/sys/sys/proc.h index e08dbb900530..fea0805bb268 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -285,6 +285,7 @@ struct thread { int td_intr_nesting_level; /* (k) Interrupt recursion. */ void *td_mailbox; /* the userland mailbox address */ struct ucred *td_ucred; /* (k) Reference to credentials. */ + void (*td_switchin)(void); /* (k) switchin special func */ #define td_endzero td_md #define td_startcopy td_endzero diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c index bf35c803e649..fbf5fe477fa5 100644 --- a/sys/vm/vm_zeroidle.c +++ b/sys/vm/vm_zeroidle.c @@ -30,11 +30,7 @@ static int cnt_prezero; SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD, &cnt_prezero, 0, ""); -#ifdef SMP -static int idlezero_enable = 0; -#else static int idlezero_enable = 1; -#endif SYSCTL_INT(_vm, OID_AUTO, idlezero_enable, CTLFLAG_RW, &idlezero_enable, 0, ""); TUNABLE_INT("vm.idlezero_enable", &idlezero_enable);