From eae8fc2c8a9d1acc07bfa247561327da5183037d Mon Sep 17 00:00:00 2001 From: Steve Passe Date: Mon, 8 Dec 1997 23:00:24 +0000 Subject: [PATCH] The improvements to clock statistics by Tor Egge Wrappered and enabled by the define BETTER_CLOCK (on by default in smpyests.h) Reviewed by: smp@csn.net Submitted by: Tor Egge --- sys/amd64/amd64/mp_machdep.c | 316 ++++++++++++++++++++++++++++++++++- sys/amd64/amd64/mptable.c | 316 ++++++++++++++++++++++++++++++++++- sys/amd64/include/mptable.h | 316 ++++++++++++++++++++++++++++++++++- sys/amd64/include/smp.h | 9 +- sys/i386/i386/mp_machdep.c | 316 ++++++++++++++++++++++++++++++++++- sys/i386/i386/mptable.c | 316 ++++++++++++++++++++++++++++++++++- sys/i386/include/asnames.h | 9 +- sys/i386/include/mptable.h | 316 ++++++++++++++++++++++++++++++++++- sys/i386/include/smp.h | 9 +- sys/i386/include/smptests.h | 23 ++- sys/kern/kern_clock.c | 21 ++- sys/kern/kern_tc.c | 21 ++- sys/kern/kern_timeout.c | 21 ++- sys/kern/subr_smp.c | 316 ++++++++++++++++++++++++++++++++++- sys/sys/smp.h | 9 +- 15 files changed, 2319 insertions(+), 15 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index f1e720f22d73..632d5dddfb91 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/amd64/amd64/mptable.c b/sys/amd64/amd64/mptable.c index f1e720f22d73..632d5dddfb91 100644 --- a/sys/amd64/amd64/mptable.c +++ b/sys/amd64/amd64/mptable.c @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/amd64/include/mptable.h b/sys/amd64/include/mptable.h index f1e720f22d73..632d5dddfb91 100644 --- a/sys/amd64/include/mptable.h +++ b/sys/amd64/include/mptable.h @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 82a6c34095ab..68fdb912240c 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: smp.h,v 1.27 1997/09/05 18:08:57 smp Exp smp $ + * $Id: smp.h,v 1.32 1997/09/07 22:01:53 fsmp Exp $ * */ @@ -79,6 +79,13 @@ extern u_int ivectors[]; extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; +#ifdef BETTER_CLOCK +void forward_statclock __P((int pscnt)); +void forward_hardclock __P((int pscnt)); +extern unsigned int checkstate_probed_cpus; +extern unsigned int checkstate_need_ast; +#endif + /* global data in apic_ipl.s */ extern u_int vec[]; extern u_int Xintr8254; diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index f1e720f22d73..632d5dddfb91 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/i386/i386/mptable.c b/sys/i386/i386/mptable.c index f1e720f22d73..632d5dddfb91 100644 --- a/sys/i386/i386/mptable.c +++ b/sys/i386/i386/mptable.c @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/i386/include/asnames.h b/sys/i386/include/asnames.h index 3330915fbff2..f1a1459d422b 100644 --- a/sys/i386/include/asnames.h +++ b/sys/i386/include/asnames.h @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: asnames.h,v 1.9 1997/09/21 15:03:59 peter Exp $ + * $Id: asnames.h,v 1.10 1997/09/29 02:17:42 peter Exp $ */ #ifndef _MACHINE_ASNAMES_H_ @@ -129,6 +129,7 @@ #define _Xintr9 Xintr9 #define _XintrRTC XintrRTC #define _Xinvltlb Xinvltlb +#define _Xcpucheckstate Xcpucheckstate #define _Xmchk Xmchk #define _Xmissing Xmissing #define _Xnmi Xnmi @@ -327,6 +328,12 @@ #define _ss_unlock ss_unlock #define _started_cpus started_cpus #define _stopped_cpus stopped_cpus +#define _checkstate_cpus checkstate_cpus +#define _checkstate_cpustate checkstate_cpustate +#define _checkstate_curproc checkstate_curproc +#define _checkstate_pc checkstate_pc +#define _checkstate_need_ast checkstate_need_ast +#define _checkstate_pending_ast checkstate_pending_ast #define _syscall syscall #define _szsigcode szsigcode #define _time time diff --git a/sys/i386/include/mptable.h b/sys/i386/include/mptable.h index f1e720f22d73..632d5dddfb91 100644 --- a/sys/i386/include/mptable.h +++ b/sys/i386/include/mptable.h @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 82a6c34095ab..68fdb912240c 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: smp.h,v 1.27 1997/09/05 18:08:57 smp Exp smp $ + * $Id: smp.h,v 1.32 1997/09/07 22:01:53 fsmp Exp $ * */ @@ -79,6 +79,13 @@ extern u_int ivectors[]; extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; +#ifdef BETTER_CLOCK +void forward_statclock __P((int pscnt)); +void forward_hardclock __P((int pscnt)); +extern unsigned int checkstate_probed_cpus; +extern unsigned int checkstate_need_ast; +#endif + /* global data in apic_ipl.s */ extern u_int vec[]; extern u_int Xintr8254; diff --git a/sys/i386/include/smptests.h b/sys/i386/include/smptests.h index 2ad4da810d7b..da80b72c2cdb 100644 --- a/sys/i386/include/smptests.h +++ b/sys/i386/include/smptests.h @@ -22,7 +22,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: smptests.h,v 1.26 1997/09/07 22:02:02 fsmp Exp $ + * $Id: smptests.h,v 1.27 1997/09/07 23:06:15 fsmp Exp $ */ #ifndef _MACHINE_SMPTESTS_H_ @@ -34,6 +34,27 @@ */ +/* + * Tor's clock improvements. + * + * When the giant kernel lock disappears, a different strategy should + * probably be used, thus this patch can only be considered a temporary + * measure. + * + * This patch causes (NCPU-1)*(128+100) extra IPIs per second. + * During profiling, the number is (NCPU-1)*(1024+100) extra IPIs/s + * in addition to extra IPIs due to forwarding ASTs to other CPUs. + * + * Having a shared AST flag in an SMP configuration is wrong, and I've + * just kludged around it, based upon the kernel lock blocking other + * processors from entering the kernel while handling an AST for one + * processor. When the giant kernel lock disappers, this kludge breaks. + * + * -- Tor + */ +#define BETTER_CLOCK + + /* * Control the "giant lock" pushdown by logical steps. */ diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 5f138615a64f..220e4b534858 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.44 1997/11/18 12:24:22 bde Exp $ + * $Id: kern_clock.c,v 1.45 1997/11/24 15:15:27 bde Exp $ */ /* Portions of this software are covered by the following: */ @@ -80,6 +80,10 @@ #include #endif +#if defined(SMP) && defined(BETTER_CLOCK) +#include +#endif + static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) @@ -91,7 +95,11 @@ struct callout_tailq *callwheel; /* Some of these don't belong here, but it's easiest to concentrate them. */ +#if defined(SMP) && defined(BETTER_CLOCK) +long cp_time[CPUSTATES]; +#else static long cp_time[CPUSTATES]; +#endif long dk_seek[DK_NDRIVE]; static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ long dk_wds[DK_NDRIVE]; @@ -471,6 +479,9 @@ hardclock(frame) psignal(p, SIGPROF); } +#if defined(SMP) && defined(BETTER_CLOCK) + forward_hardclock(pscnt); +#endif /* * If no separate statistics clock is available, run it from here. */ @@ -971,6 +982,10 @@ statclock(frame) p = curproc; if (p->p_flag & P_PROFIL) addupc_intr(p, CLKF_PC(frame), 1); +#if defined(SMP) && defined(BETTER_CLOCK) + if (stathz != 0) + forward_statclock(pscnt); +#endif if (--pscnt > 0) return; /* @@ -995,6 +1010,10 @@ statclock(frame) g->kcount[i]++; } } +#endif +#if defined(SMP) && defined(BETTER_CLOCK) + if (stathz != 0) + forward_statclock(pscnt); #endif if (--pscnt > 0) return; diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 5f138615a64f..220e4b534858 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.44 1997/11/18 12:24:22 bde Exp $ + * $Id: kern_clock.c,v 1.45 1997/11/24 15:15:27 bde Exp $ */ /* Portions of this software are covered by the following: */ @@ -80,6 +80,10 @@ #include #endif +#if defined(SMP) && defined(BETTER_CLOCK) +#include +#endif + static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) @@ -91,7 +95,11 @@ struct callout_tailq *callwheel; /* Some of these don't belong here, but it's easiest to concentrate them. */ +#if defined(SMP) && defined(BETTER_CLOCK) +long cp_time[CPUSTATES]; +#else static long cp_time[CPUSTATES]; +#endif long dk_seek[DK_NDRIVE]; static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ long dk_wds[DK_NDRIVE]; @@ -471,6 +479,9 @@ hardclock(frame) psignal(p, SIGPROF); } +#if defined(SMP) && defined(BETTER_CLOCK) + forward_hardclock(pscnt); +#endif /* * If no separate statistics clock is available, run it from here. */ @@ -971,6 +982,10 @@ statclock(frame) p = curproc; if (p->p_flag & P_PROFIL) addupc_intr(p, CLKF_PC(frame), 1); +#if defined(SMP) && defined(BETTER_CLOCK) + if (stathz != 0) + forward_statclock(pscnt); +#endif if (--pscnt > 0) return; /* @@ -995,6 +1010,10 @@ statclock(frame) g->kcount[i]++; } } +#endif +#if defined(SMP) && defined(BETTER_CLOCK) + if (stathz != 0) + forward_statclock(pscnt); #endif if (--pscnt > 0) return; diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 5f138615a64f..220e4b534858 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.44 1997/11/18 12:24:22 bde Exp $ + * $Id: kern_clock.c,v 1.45 1997/11/24 15:15:27 bde Exp $ */ /* Portions of this software are covered by the following: */ @@ -80,6 +80,10 @@ #include #endif +#if defined(SMP) && defined(BETTER_CLOCK) +#include +#endif + static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) @@ -91,7 +95,11 @@ struct callout_tailq *callwheel; /* Some of these don't belong here, but it's easiest to concentrate them. */ +#if defined(SMP) && defined(BETTER_CLOCK) +long cp_time[CPUSTATES]; +#else static long cp_time[CPUSTATES]; +#endif long dk_seek[DK_NDRIVE]; static long dk_time[DK_NDRIVE]; /* time busy (in statclock ticks) */ long dk_wds[DK_NDRIVE]; @@ -471,6 +479,9 @@ hardclock(frame) psignal(p, SIGPROF); } +#if defined(SMP) && defined(BETTER_CLOCK) + forward_hardclock(pscnt); +#endif /* * If no separate statistics clock is available, run it from here. */ @@ -971,6 +982,10 @@ statclock(frame) p = curproc; if (p->p_flag & P_PROFIL) addupc_intr(p, CLKF_PC(frame), 1); +#if defined(SMP) && defined(BETTER_CLOCK) + if (stathz != 0) + forward_statclock(pscnt); +#endif if (--pscnt > 0) return; /* @@ -995,6 +1010,10 @@ statclock(frame) g->kcount[i]++; } } +#endif +#if defined(SMP) && defined(BETTER_CLOCK) + if (stathz != 0) + forward_statclock(pscnt); #endif if (--pscnt > 0) return; diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index f1e720f22d73..632d5dddfb91 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -22,23 +22,37 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: mp_machdep.c,v 1.59 1997/10/28 15:58:10 bde Exp $ + * $Id: mp_machdep.c,v 1.40 1997/12/04 19:30:03 smp Exp smp $ */ #include "opt_smp.h" #include "opt_vm86.h" +#ifdef SMP +#include +#else +#error +#endif + #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#endif #include #include #include #include #include +#ifdef BETTER_CLOCK +#include +#include +#include +#endif #include #include @@ -548,6 +562,16 @@ mp_enable(u_int boot_addr) setidt(XINVLTLB_OFFSET, Xinvltlb, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#ifdef BETTER_CLOCK + /* install an inter-CPU IPI for reading processor state */ + setidt(XCPUCHECKSTATE_OFFSET, Xcpucheckstate, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); + + /* install an inter-CPU IPI for forcing an additional software trap */ + setidt(XCPUAST_OFFSET, Xcpuast, + SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); +#endif + /* install an inter-CPU IPI for CPU stop/restart */ setidt(XCPUSTOP_OFFSET, Xcpustop, SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL)); @@ -2051,3 +2075,293 @@ putfmtrr() wrmsr(0x259, 0x0101010101010101LL); } } + + +#ifdef BETTER_CLOCK + +#define CHECKSTATE_USER 0 +#define CHECKSTATE_SYS 1 +#define CHECKSTATE_INTR 2 + +struct proc* checkstate_curproc[NCPU]; +int checkstate_cpustate[NCPU]; +u_long checkstate_pc[NCPU]; + +extern long cp_time[CPUSTATES]; + +#define PC_TO_INDEX(pc, prof) \ + ((int)(((u_quad_t)((pc) - (prof)->pr_off) * \ + (u_quad_t)((prof)->pr_scale)) >> 16) & ~1) + +static void +addupc_intr_forwarded(struct proc *p, int id, int *astmap) +{ + int i; + struct uprof *prof; + u_long pc; + + pc = checkstate_pc[id]; + prof = &p->p_stats->p_prof; + if (pc >= prof->pr_off && + (i = PC_TO_INDEX(pc, prof)) < prof->pr_size) { + if ((p->p_flag & P_OWEUPC) == 0) { + prof->pr_addr = pc; + prof->pr_ticks = 1; + p->p_flag |= P_OWEUPC; + } + *astmap |= (1 << id); + } +} + +static void +forwarded_statclock(int id, int pscnt, int *astmap) +{ + struct pstats *pstats; + long rss; + struct rusage *ru; + struct vmspace *vm; + int cpustate; + struct proc *p; +#ifdef GPROF + register struct gmonparam *g; + int i; +#endif + + p = checkstate_curproc[id]; + cpustate = checkstate_cpustate[id]; + + switch (cpustate) { + case CHECKSTATE_USER: + if (p->p_flag & P_PROFIL) + addupc_intr_forwarded(p, id, astmap); + if (pscnt > 1) + return; + p->p_uticks++; + if (p->p_nice > NZERO) + cp_time[CP_NICE]++; + else + cp_time[CP_USER]++; + break; + case CHECKSTATE_SYS: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + + if (!p) + cp_time[CP_IDLE]++; + else { + p->p_sticks++; + cp_time[CP_SYS]++; + } + break; + case CHECKSTATE_INTR: + default: +#ifdef GPROF + /* + * Kernel statistics are just like addupc_intr, only easier. + */ + g = &_gmonparam; + if (g->state == GMON_PROF_ON) { + i = checkstate_pc[id] - g->lowpc; + if (i < g->textsize) { + i /= HISTFRACTION * sizeof(*g->kcount); + g->kcount[i]++; + } + } +#endif + if (pscnt > 1) + return; + if (p) + p->p_iticks++; + cp_time[CP_INTR]++; + } + if (p != NULL) { + p->p_cpticks++; + if (++p->p_estcpu == 0) + p->p_estcpu--; + if ((p->p_estcpu & 3) == 0) { + resetpriority(p); + if (p->p_priority >= PUSER) + p->p_priority = p->p_usrpri; + } + + /* Update resource usage integrals and maximums. */ + if ((pstats = p->p_stats) != NULL && + (ru = &pstats->p_ru) != NULL && + (vm = p->p_vmspace) != NULL) { + ru->ru_ixrss += vm->vm_tsize * PAGE_SIZE / 1024; + ru->ru_idrss += vm->vm_dsize * PAGE_SIZE / 1024; + ru->ru_isrss += vm->vm_ssize * PAGE_SIZE / 1024; + rss = vm->vm_pmap.pm_stats.resident_count * + PAGE_SIZE / 1024; + if (ru->ru_maxrss < rss) + ru->ru_maxrss = rss; + } + } +} + +void +forward_statclock(int pscnt) +{ + int map; + int id; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle ) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_statclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + forwarded_statclock(id, pscnt, &map); + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_statclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +void +forward_hardclock(int pscnt) +{ + int map; + int id; + struct proc *p; + struct pstats *pstats; + int i; + + /* Kludge. We don't yet have separate locks for the interrupts + * and the kernel. This means that we cannot let the other processors + * handle complex interrupts while inhibiting them from entering + * the kernel in a non-interrupt context. + * + * What we can do, without changing the locking mechanisms yet, + * is letting the other processors handle a very simple interrupt + * (wich determines the processor states), and do the main + * work ourself. + */ + + if (!smp_started || !invltlb_ok) + return; + + /* Step 1: Probe state (user, cpu, interrupt, spinlock, idle) */ + + map = other_cpus; + checkstate_probed_cpus = 0; + selected_apic_ipi(map, XCPUCHECKSTATE_OFFSET, APIC_DELMODE_FIXED); + + i = 0; + while (checkstate_probed_cpus != map) { + /* spin */ + i++; + if (i == 1000000) { + printf("forward_hardclock: checkstate %x\n", + checkstate_probed_cpus); + } + } + + /* + * Step 2: walk through other processors processes, update virtual + * timer and profiling timer. If stathz == 0, also update ticks and + * profiling info. + */ + + map = 0; + for (id = 0; id < mp_ncpus; id++) { + if (id == cpuid) + continue; + if (((1 << id) & checkstate_probed_cpus) == 0) + panic("state for cpu %d not available", cpuid); + p = checkstate_curproc[id]; + if (p) { + pstats = p->p_stats; + if (checkstate_cpustate[id] == CHECKSTATE_USER && + timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) { + psignal(p, SIGVTALRM); + map |= (1 << id); + } + if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) && + itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) { + psignal(p, SIGPROF); + map |= (1 << id); + } + } + if (stathz == 0) { + forwarded_statclock( id, pscnt, &map); + } + } + if (map != 0) { + checkstate_need_ast |= map; + selected_apic_ipi(map, XCPUAST_OFFSET, APIC_DELMODE_FIXED); + i = 0; + while (checkstate_need_ast != 0) { + /* spin */ + i++; + if (i > 1000000) { + printf("forward_hardclock: dropped ast 0x%x\n", + checkstate_need_ast); + break; + } + } + } +} + +#endif /* BETTER_CLOCK */ diff --git a/sys/sys/smp.h b/sys/sys/smp.h index 82a6c34095ab..68fdb912240c 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -6,7 +6,7 @@ * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * $Id: smp.h,v 1.27 1997/09/05 18:08:57 smp Exp smp $ + * $Id: smp.h,v 1.32 1997/09/07 22:01:53 fsmp Exp $ * */ @@ -79,6 +79,13 @@ extern u_int ivectors[]; extern volatile u_int stopped_cpus; extern volatile u_int started_cpus; +#ifdef BETTER_CLOCK +void forward_statclock __P((int pscnt)); +void forward_hardclock __P((int pscnt)); +extern unsigned int checkstate_probed_cpus; +extern unsigned int checkstate_need_ast; +#endif + /* global data in apic_ipl.s */ extern u_int vec[]; extern u_int Xintr8254;