freebsd-dev/sys/kern/kern_clock.c

590 lines
15 KiB
C
Raw Normal View History

1994-05-24 10:09:53 +00:00
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
*/
2003-06-11 00:56:59 +00:00
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_kdb.h"
#include "opt_device_polling.h"
#include "opt_hwpmc_hooks.h"
#include "opt_ntp.h"
#include "opt_watchdog.h"
1994-05-24 10:09:53 +00:00
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/callout.h>
#include <sys/kdb.h>
1994-05-24 10:09:53 +00:00
#include <sys/kernel.h>
#include <sys/lock.h>
2001-10-11 17:53:43 +00:00
#include <sys/ktr.h>
#include <sys/mutex.h>
1994-05-24 10:09:53 +00:00
#include <sys/proc.h>
#include <sys/resource.h>
1994-05-24 10:09:53 +00:00
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
#include <sys/smp.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <sys/sysctl.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
#include <sys/limits.h>
#include <sys/timetc.h>
1994-05-24 10:09:53 +00:00
#ifdef GPROF
#include <sys/gmon.h>
#endif
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
Device Polling code for -current. Non-SMP, i386-only, no polling in the idle loop at the moment. To use this code you must compile a kernel with options DEVICE_POLLING and at runtime enable polling with sysctl kern.polling.enable=1 The percentage of CPU reserved to userland can be set with sysctl kern.polling.user_frac=NN (default is 50) while the remainder is used by polling device drivers and netisr's. These are the only two variables that you should need to touch. There are a few more parameters in kern.polling but the default values are adequate for all purposes. See the code in kern_poll.c for more details on them. Polling in the idle loop will be implemented shortly by introducing a kernel thread which does the job. Until then, the amount of CPU dedicated to polling will never exceed (100-user_frac). The equivalent (actually, better) code for -stable is at http://info.iet.unipi.it/~luigi/polling/ and also supports polling in the idle loop. NOTE to Alpha developers: There is really nothing in this code that is i386-specific. If you move the 2 lines supporting the new option from sys/conf/{files,options}.i386 to sys/conf/{files,options} I am pretty sure that this should work on the Alpha as well, just that I do not have a suitable test box to try it. If someone feels like trying it, I would appreciate it. NOTE to other developers: sure some things could be done better, and as always I am open to constructive criticism, which a few of you have already given and I greatly appreciated. However, before proposing radical architectural changes, please take some time to possibly try out this code, or at the very least read the comments in kern_poll.c, especially re. the reason why I am using a soft netisr and cannot (I believe) replace it with a simple timeout. Quick description of files touched by this commit: sys/conf/files.i386 new file kern/kern_poll.c sys/conf/options.i386 new option sys/i386/i386/trap.c poll in trap (disabled by default) sys/kern/kern_clock.c initialization and hardclock hooks. sys/kern/kern_intr.c minor swi_net changes sys/kern/kern_poll.c the bulk of the code. sys/net/if.h new flag sys/net/if_var.h declaration for functions used in device drivers. sys/net/netisr.h NETISR_POLL sys/dev/fxp/if_fxp.c sys/dev/fxp/if_fxpvar.h sys/pci/if_dc.c sys/pci/if_dcreg.h sys/pci/if_sis.c sys/pci/if_sisreg.h device driver modifications
2001-12-14 17:56:12 +00:00
#ifdef DEVICE_POLLING
extern void hardclock_device_poll(void);
#endif /* DEVICE_POLLING */
2002-03-19 21:25:46 +00:00
static void initclocks(void *dummy);
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
/* Some of these don't belong here, but it's easiest to concentrate them. */
long cp_time[CPUSTATES];
/* Spin-lock protecting profiling statistics. */
struct mtx time_lock;
static int
sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS)
{
int error;
#ifdef SCTL_MASK32
int i;
unsigned int cp_time32[CPUSTATES];
2006-04-17 20:14:51 +00:00
if (req->flags & SCTL_MASK32) {
if (!req->oldptr)
return SYSCTL_OUT(req, 0, sizeof(cp_time32));
for (i = 0; i < CPUSTATES; i++)
cp_time32[i] = (unsigned int)cp_time[i];
error = SYSCTL_OUT(req, cp_time32, sizeof(cp_time32));
} else
#endif
{
if (!req->oldptr)
return SYSCTL_OUT(req, 0, sizeof(cp_time));
error = SYSCTL_OUT(req, cp_time, sizeof(cp_time));
}
return error;
}
2006-04-17 20:14:51 +00:00
SYSCTL_PROC(_kern, OID_AUTO, cp_time, CTLTYPE_LONG|CTLFLAG_RD,
0,0, sysctl_kern_cp_time, "LU", "CPU time statistics");
#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
static int watchdog_ticks;
static int watchdog_enabled;
static void watchdog_fire(void);
static void watchdog_config(void *, u_int, int *);
#endif /* SW_WATCHDOG */
1994-05-24 10:09:53 +00:00
/*
* Clock handling routines.
*
* This code is written to operate with two timers that run independently of
* each other.
1994-05-24 10:09:53 +00:00
*
* The main timer, running hz times per second, is used to trigger interval
* timers, timeouts and rescheduling as needed.
1994-05-24 10:09:53 +00:00
*
* The second timer handles kernel and user profiling,
* and does resource use estimation. If the second timer is programmable,
* it is randomized to avoid aliasing between the two clocks. For example,
* the randomization prevents an adversary from always giving up the cpu
* just before its quantum expires. Otherwise, it would never accumulate
* cpu ticks. The mean frequency of the second timer is stathz.
*
* If no second timer exists, stathz will be zero; in this case we drive
* profiling and statistics off the main clock. This WILL NOT be accurate;
* do not do it unless absolutely necessary.
*
1994-05-24 10:09:53 +00:00
* The statistics clock may (or may not) be run at a higher rate while
* profiling. This profile clock runs at profhz. We require that profhz
* be an integral multiple of stathz.
*
* If the statistics clock is running fast, it must be divided by the ratio
* profhz/stathz for statistics. (For profiling, every tick counts.)
1994-05-24 10:09:53 +00:00
*
* Time-of-day is maintained using a "timecounter", which may or may
* not be related to the hardware generating the above mentioned
* interrupts.
1994-05-24 10:09:53 +00:00
*/
int stathz;
int profhz;
int profprocs;
1994-05-24 10:09:53 +00:00
int ticks;
int psratio;
1994-05-24 10:09:53 +00:00
/*
* Initialize clock frequencies and start both clocks running.
*/
/* ARGSUSED*/
static void
initclocks(dummy)
void *dummy;
1994-05-24 10:09:53 +00:00
{
register int i;
/*
* Set divisors to 1 (normal case) and let the machine-specific
* code do its bit.
*/
mtx_init(&time_lock, "time lock", NULL, MTX_SPIN);
cpu_initclocks();
1994-05-24 10:09:53 +00:00
/*
* Compute profhz/stathz, and fix profhz if needed.
*/
i = stathz ? stathz : hz;
if (profhz == 0)
profhz = i;
psratio = profhz / i;
#ifdef SW_WATCHDOG
EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
#endif
1994-05-24 10:09:53 +00:00
}
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
/*
* Each time the real-time timer fires, this function is called on all CPUs.
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
* Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
* the other CPUs in the system need to call this function.
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
*/
void
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
hardclock_cpu(int usermode)
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
{
struct pstats *pstats;
struct thread *td = curthread;
struct proc *p = td->td_proc;
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
/*
* Run current process's virtual and profile time, as needed.
*/
mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
Add scheduler CORE, the work I have done half a year ago, recent, I picked it up again. The scheduler is forked from ULE, but the algorithm to detect an interactive process is almost completely different with ULE, it comes from Linux paper "Understanding the Linux 2.6.8.1 CPU Scheduler", although I still use same word "score" as a priority boost in ULE scheduler. Briefly, the scheduler has following characteristic: 1. Timesharing process's nice value is seriously respected, timeslice and interaction detecting algorithm are based on nice value. 2. per-cpu scheduling queue and load balancing. 3. O(1) scheduling. 4. Some cpu affinity code in wakeup path. 5. Support POSIX SCHED_FIFO and SCHED_RR. Unlike scheduler 4BSD and ULE which using fuzzy RQ_PPQ, the scheduler uses 256 priority queues. Unlike ULE which using pull and push, the scheduelr uses pull method, the main reason is to let relative idle cpu do the work, but current the whole scheduler is protected by the big sched_lock, so the benefit is not visible, it really can be worse than nothing because all other cpu are locked out when we are doing balancing work, which the 4BSD scheduelr does not have this problem. The scheduler does not support hyperthreading very well, in fact, the scheduler does not make the difference between physical CPU and logical CPU, this should be improved in feature. The scheduler has priority inversion problem on MP machine, it is not good for realtime scheduling, it can cause realtime process starving. As a result, it seems the MySQL super-smack runs better on my Pentium-D machine when using libthr, despite on UP or SMP kernel.
2006-06-13 13:12:56 +00:00
sched_tick();
#ifdef KSE
#if 0 /* for now do nothing */
if (p->p_flag & P_SA) {
/* XXXKSE What to do? Should do more. */
}
#endif
#endif
pstats = p->p_stats;
if (usermode &&
timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) {
p->p_sflag |= PS_ALRMPEND;
td->td_flags |= TDF_ASTPENDING;
}
if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) {
p->p_sflag |= PS_PROFPEND;
td->td_flags |= TDF_ASTPENDING;
}
mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
#ifdef HWPMC_HOOKS
if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
#endif
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
}
1994-05-24 10:09:53 +00:00
/*
* The real-time timer, interrupting hz times per second.
*/
void
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
hardclock(int usermode, uintfptr_t pc)
1994-05-24 10:09:53 +00:00
{
int need_softclock = 0;
1994-05-24 10:09:53 +00:00
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
hardclock_cpu(usermode);
tc_ticktock();
1994-05-24 10:09:53 +00:00
/*
* If no separate statistics clock is available, run it from here.
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
*
* XXX: this only works for UP
1994-05-24 10:09:53 +00:00
*/
if (stathz == 0) {
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
profclock(usermode, pc);
statclock(usermode);
}
1994-05-24 10:09:53 +00:00
Device Polling code for -current. Non-SMP, i386-only, no polling in the idle loop at the moment. To use this code you must compile a kernel with options DEVICE_POLLING and at runtime enable polling with sysctl kern.polling.enable=1 The percentage of CPU reserved to userland can be set with sysctl kern.polling.user_frac=NN (default is 50) while the remainder is used by polling device drivers and netisr's. These are the only two variables that you should need to touch. There are a few more parameters in kern.polling but the default values are adequate for all purposes. See the code in kern_poll.c for more details on them. Polling in the idle loop will be implemented shortly by introducing a kernel thread which does the job. Until then, the amount of CPU dedicated to polling will never exceed (100-user_frac). The equivalent (actually, better) code for -stable is at http://info.iet.unipi.it/~luigi/polling/ and also supports polling in the idle loop. NOTE to Alpha developers: There is really nothing in this code that is i386-specific. If you move the 2 lines supporting the new option from sys/conf/{files,options}.i386 to sys/conf/{files,options} I am pretty sure that this should work on the Alpha as well, just that I do not have a suitable test box to try it. If someone feels like trying it, I would appreciate it. NOTE to other developers: sure some things could be done better, and as always I am open to constructive criticism, which a few of you have already given and I greatly appreciated. However, before proposing radical architectural changes, please take some time to possibly try out this code, or at the very least read the comments in kern_poll.c, especially re. the reason why I am using a soft netisr and cannot (I believe) replace it with a simple timeout. Quick description of files touched by this commit: sys/conf/files.i386 new file kern/kern_poll.c sys/conf/options.i386 new option sys/i386/i386/trap.c poll in trap (disabled by default) sys/kern/kern_clock.c initialization and hardclock hooks. sys/kern/kern_intr.c minor swi_net changes sys/kern/kern_poll.c the bulk of the code. sys/net/if.h new flag sys/net/if_var.h declaration for functions used in device drivers. sys/net/netisr.h NETISR_POLL sys/dev/fxp/if_fxp.c sys/dev/fxp/if_fxpvar.h sys/pci/if_dc.c sys/pci/if_dcreg.h sys/pci/if_sis.c sys/pci/if_sisreg.h device driver modifications
2001-12-14 17:56:12 +00:00
#ifdef DEVICE_POLLING
hardclock_device_poll(); /* this is very short and quick */
Device Polling code for -current. Non-SMP, i386-only, no polling in the idle loop at the moment. To use this code you must compile a kernel with options DEVICE_POLLING and at runtime enable polling with sysctl kern.polling.enable=1 The percentage of CPU reserved to userland can be set with sysctl kern.polling.user_frac=NN (default is 50) while the remainder is used by polling device drivers and netisr's. These are the only two variables that you should need to touch. There are a few more parameters in kern.polling but the default values are adequate for all purposes. See the code in kern_poll.c for more details on them. Polling in the idle loop will be implemented shortly by introducing a kernel thread which does the job. Until then, the amount of CPU dedicated to polling will never exceed (100-user_frac). The equivalent (actually, better) code for -stable is at http://info.iet.unipi.it/~luigi/polling/ and also supports polling in the idle loop. NOTE to Alpha developers: There is really nothing in this code that is i386-specific. If you move the 2 lines supporting the new option from sys/conf/{files,options}.i386 to sys/conf/{files,options} I am pretty sure that this should work on the Alpha as well, just that I do not have a suitable test box to try it. If someone feels like trying it, I would appreciate it. NOTE to other developers: sure some things could be done better, and as always I am open to constructive criticism, which a few of you have already given and I greatly appreciated. However, before proposing radical architectural changes, please take some time to possibly try out this code, or at the very least read the comments in kern_poll.c, especially re. the reason why I am using a soft netisr and cannot (I believe) replace it with a simple timeout. Quick description of files touched by this commit: sys/conf/files.i386 new file kern/kern_poll.c sys/conf/options.i386 new option sys/i386/i386/trap.c poll in trap (disabled by default) sys/kern/kern_clock.c initialization and hardclock hooks. sys/kern/kern_intr.c minor swi_net changes sys/kern/kern_poll.c the bulk of the code. sys/net/if.h new flag sys/net/if_var.h declaration for functions used in device drivers. sys/net/netisr.h NETISR_POLL sys/dev/fxp/if_fxp.c sys/dev/fxp/if_fxpvar.h sys/pci/if_dc.c sys/pci/if_dcreg.h sys/pci/if_sis.c sys/pci/if_sisreg.h device driver modifications
2001-12-14 17:56:12 +00:00
#endif /* DEVICE_POLLING */
/*
* Process callouts at a very low cpu priority, so we don't keep the
* relatively high clock interrupt priority any longer than necessary.
*/
mtx_lock_spin_flags(&callout_lock, MTX_QUIET);
ticks++;
2006-06-14 03:14:26 +00:00
if (!TAILQ_EMPTY(&callwheel[ticks & callwheelmask])) {
need_softclock = 1;
} else if (softticks + 1 == ticks)
++softticks;
mtx_unlock_spin_flags(&callout_lock, MTX_QUIET);
/*
* swi_sched acquires sched_lock, so we don't want to call it with
* callout_lock held; incorrect locking order.
*/
if (need_softclock)
Change the preemption code for software interrupt thread schedules and mutex releases to not require flags for the cases when preemption is not allowed: The purpose of the MTX_NOSWITCH and SWI_NOSWITCH flags is to prevent switching to a higher priority thread on mutex releease and swi schedule, respectively when that switch is not safe. Now that the critical section API maintains a per-thread nesting count, the kernel can easily check whether or not it should switch without relying on flags from the programmer. This fixes a few bugs in that all current callers of swi_sched() used SWI_NOSWITCH, when in fact, only the ones called from fast interrupt handlers and the swi_sched of softclock needed this flag. Note that to ensure that swi_sched()'s in clock and fast interrupt handlers do not switch, these handlers have to be explicitly wrapped in critical_enter/exit pairs. Presently, just wrapping the handlers is sufficient, but in the future with the fully preemptive kernel, the interrupt must be EOI'd before critical_exit() is called. (critical_exit() can switch due to a deferred preemption in a fully preemptive kernel.) I've tested the changes to the interrupt code on i386 and alpha. I have not tested ia64, but the interrupt code is almost identical to the alpha code, so I expect it will work fine. PowerPC and ARM do not yet have interrupt code in the tree so they shouldn't be broken. Sparc64 is broken, but that's been ok'd by jake and tmm who will be fixing the interrupt code for sparc64 shortly. Reviewed by: peter Tested on: i386, alpha
2002-01-05 08:47:13 +00:00
swi_sched(softclock_ih, 0);
#ifdef SW_WATCHDOG
if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
watchdog_fire();
#endif /* SW_WATCHDOG */
init_main.c subr_autoconf.c: Add support for "interrupt driven configuration hooks". A component of the kernel can register a hook, most likely during auto-configuration, and receive a callback once interrupt services are available. This callback will occur before the root and dump devices are configured, so the configuration task can affect the selection of those two devices or complete any tasks that need to be performed prior to launching init. System boot is posponed so long as a hook is registered. The hook owner is responsible for removing the hook once their task is complete or the system boot can continue. kern_acct.c kern_clock.c kern_exit.c kern_synch.c kern_time.c: Change the interface and implementation for the kernel callout service. The new implemntaion is based on the work of Adam M. Costello and George Varghese, published in a technical report entitled "Redesigning the BSD Callout and Timer Facilities". The interface used in FreeBSD is a little different than the one outlined in the paper. The new function prototypes are: struct callout_handle timeout(void (*func)(void *), void *arg, int ticks); void untimeout(void (*func)(void *), void *arg, struct callout_handle handle); If a client wishes to remove a timeout, it must store the callout_handle returned by timeout and pass it to untimeout. The new implementation gives 0(1) insert and removal of callouts making this interface scale well even for applications that keep 100s of callouts outstanding. See the updated timeout.9 man page for more details.
1997-09-21 22:00:25 +00:00
}
1994-05-24 10:09:53 +00:00
/*
* Compute number of ticks in the specified amount of time.
1994-05-24 10:09:53 +00:00
*/
int
tvtohz(tv)
1994-05-24 10:09:53 +00:00
struct timeval *tv;
{
register unsigned long ticks;
register long sec, usec;
1994-05-24 10:09:53 +00:00
/*
* If the number of usecs in the whole seconds part of the time
* difference fits in a long, then the total number of usecs will
* fit in an unsigned long. Compute the total and convert it to
* ticks, rounding up and adding 1 to allow for the current tick
* to expire. Rounding also depends on unsigned long arithmetic
* to avoid overflow.
1994-05-24 10:09:53 +00:00
*
* Otherwise, if the number of ticks in the whole seconds part of
* the time difference fits in a long, then convert the parts to
* ticks separately and add, using similar rounding methods and
* overflow avoidance. This method would work in the previous
* case but it is slightly slower and assumes that hz is integral.
*
* Otherwise, round the time difference down to the maximum
* representable value.
*
* If ints have 32 bits, then the maximum value for any timeout in
* 10ms ticks is 248 days.
1994-05-24 10:09:53 +00:00
*/
sec = tv->tv_sec;
usec = tv->tv_usec;
if (usec < 0) {
sec--;
usec += 1000000;
}
if (sec < 0) {
#ifdef DIAGNOSTIC
if (usec > 0) {
sec++;
usec -= 1000000;
}
printf("tvotohz: negative time difference %ld sec %ld usec\n",
sec, usec);
#endif
ticks = 1;
} else if (sec <= LONG_MAX / 1000000)
ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1))
/ tick + 1;
else if (sec <= LONG_MAX / hz)
ticks = sec * hz
+ ((unsigned long)usec + (tick - 1)) / tick + 1;
else
ticks = LONG_MAX;
if (ticks > INT_MAX)
ticks = INT_MAX;
return ((int)ticks);
1994-05-24 10:09:53 +00:00
}
/*
* Start profiling on a process.
*
* Kernel profiling passes proc0 which never exits and hence
* keeps the profile clock running constantly.
*/
void
startprofclock(p)
register struct proc *p;
{
PROC_LOCK_ASSERT(p, MA_OWNED);
if (p->p_flag & P_STOPPROF)
return;
if ((p->p_flag & P_PROFIL) == 0) {
p->p_flag |= P_PROFIL;
mtx_lock_spin(&time_lock);
if (++profprocs == 1)
cpu_startprofclock();
mtx_unlock_spin(&time_lock);
1994-05-24 10:09:53 +00:00
}
}
/*
* Stop profiling on a process.
*/
void
stopprofclock(p)
register struct proc *p;
{
PROC_LOCK_ASSERT(p, MA_OWNED);
if (p->p_flag & P_PROFIL) {
if (p->p_profthreads != 0) {
p->p_flag |= P_STOPPROF;
while (p->p_profthreads != 0)
msleep(&p->p_profthreads, &p->p_mtx, PPAUSE,
"stopprof", 0);
p->p_flag &= ~P_STOPPROF;
}
if ((p->p_flag & P_PROFIL) == 0)
return;
p->p_flag &= ~P_PROFIL;
mtx_lock_spin(&time_lock);
if (--profprocs == 0)
cpu_stopprofclock();
mtx_unlock_spin(&time_lock);
1994-05-24 10:09:53 +00:00
}
}
/*
* Statistics clock. Updates rusage information and calls the scheduler
* to adjust priorities of the active thread.
*
* This should be called by all active processors.
1994-05-24 10:09:53 +00:00
*/
void
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
statclock(int usermode)
1994-05-24 10:09:53 +00:00
{
struct rusage *ru;
struct vmspace *vm;
struct thread *td;
struct proc *p;
long rss;
td = curthread;
p = td->td_proc;
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
if (usermode) {
1994-05-24 10:09:53 +00:00
/*
* Charge the time as appropriate.
1994-05-24 10:09:53 +00:00
*/
#ifdef KSE
if (p->p_flag & P_SA)
thread_statclock(1);
#endif
td->td_uticks++;
mtx_lock_spin_flags(&time_lock, MTX_QUIET);
if (p->p_nice > NZERO)
1994-05-24 10:09:53 +00:00
cp_time[CP_NICE]++;
else
cp_time[CP_USER]++;
} else {
/*
* Came from kernel mode, so we were:
* - handling an interrupt,
* - doing syscall or trap work on behalf of the current
* user process, or
* - spinning in the idle loop.
* Whichever it is, charge the time as appropriate.
* Note that we charge interrupts to the current process,
* regardless of whether they are ``for'' that process,
* so that we know how much of its real time was spent
* in ``non-process'' (i.e., interrupt) work.
*/
Reorganize the interrupt handling code a bit to make a few things cleaner and increase flexibility to allow various different approaches to be tried in the future. - Split struct ithd up into two pieces. struct intr_event holds the list of interrupt handlers associated with interrupt sources. struct intr_thread contains the data relative to an interrupt thread. Currently we still provide a 1:1 relationship of events to threads with the exception that events only have an associated thread if there is at least one threaded interrupt handler attached to the event. This means that on x86 we no longer have 4 bazillion interrupt threads with no handlers. It also means that interrupt events with only INTR_FAST handlers no longer have an associated thread either. - Renamed struct intrhand to struct intr_handler to follow the struct intr_foo naming convention. This did require renaming the powerpc MD struct intr_handler to struct ppc_intr_handler. - INTR_FAST no longer implies INTR_EXCL on all architectures except for powerpc. This means that multiple INTR_FAST handlers can attach to the same interrupt and that INTR_FAST and non-INTR_FAST handlers can attach to the same interrupt. Sharing INTR_FAST handlers may not always be desirable, but having sio(4) and uhci(4) fight over an IRQ isn't fun either. Drivers can always still use INTR_EXCL to ask for an interrupt exclusively. The way this sharing works is that when an interrupt comes in, all the INTR_FAST handlers are executed first, and if any threaded handlers exist, the interrupt thread is scheduled afterwards. This type of layout also makes it possible to investigate using interrupt filters ala OS X where the filter determines whether or not its companion threaded handler should run. - Aside from the INTR_FAST changes above, the impact on MD interrupt code is mostly just 's/ithread/intr_event/'. - A new MI ddb command 'show intrs' walks the list of interrupt events dumping their state. It also has a '/v' verbose switch which dumps info about all of the handlers attached to each event. - We currently don't destroy an interrupt thread when the last threaded handler is removed because it would suck for things like ppbus(8)'s braindead behavior. The code is present, though, it is just under #if 0 for now. - Move the code to actually execute the threaded handlers for an interrrupt event into a separate function so that ithread_loop() becomes more readable. Previously this code was all in the middle of ithread_loop() and indented halfway across the screen. - Made struct intr_thread private to kern_intr.c and replaced td_ithd with a thread private flag TDP_ITHREAD. - In statclock, check curthread against idlethread directly rather than curthread's proc against idlethread's proc. (Not really related to intr changes) Tested on: alpha, amd64, i386, sparc64 Tested on: arm, ia64 (older version of patch by cognet and marcel)
2005-10-25 19:48:48 +00:00
if ((td->td_pflags & TDP_ITHREAD) ||
td->td_intr_nesting_level >= 2) {
td->td_iticks++;
mtx_lock_spin_flags(&time_lock, MTX_QUIET);
1994-05-24 10:09:53 +00:00
cp_time[CP_INTR]++;
} else {
#ifdef KSE
if (p->p_flag & P_SA)
thread_statclock(0);
#endif
td->td_pticks++;
td->td_sticks++;
mtx_lock_spin_flags(&time_lock, MTX_QUIET);
if (!TD_IS_IDLETHREAD(td))
cp_time[CP_SYS]++;
else
cp_time[CP_IDLE]++;
}
1994-05-24 10:09:53 +00:00
}
mtx_unlock_spin_flags(&time_lock, MTX_QUIET);
2004-12-26 00:14:21 +00:00
CTR4(KTR_SCHED, "statclock: %p(%s) prio %d stathz %d",
td, td->td_proc->p_comm, td->td_priority, (stathz)?stathz:hz);
1994-05-24 10:09:53 +00:00
mtx_lock_spin_flags(&sched_lock, MTX_QUIET);
sched_clock(td);
/* Update resource usage integrals and maximums. */
MPASS(p->p_vmspace != NULL);
vm = p->p_vmspace;
ru = &td->td_ru;
ru->ru_ixrss += pgtok(vm->vm_tsize);
ru->ru_idrss += pgtok(vm->vm_dsize);
ru->ru_isrss += pgtok(vm->vm_ssize);
rss = pgtok(vmspace_resident_count(vm));
if (ru->ru_maxrss < rss)
ru->ru_maxrss = rss;
mtx_unlock_spin_flags(&sched_lock, MTX_QUIET);
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
}
- Change fast interrupts on x86 to push a full interrupt frame and to return through doreti to handle ast's. This is necessary for the clock interrupts to work properly. - Change the clock interrupts on the x86 to be fast instead of threaded. This is needed because both hardclock() and statclock() need to run in the context of the current process, not in a separate thread context. - Kill the prevproc hack as it is no longer needed. - We really need Giant when we call psignal(), but we don't want to block during the clock interrupt. Instead, use two p_flag's in the proc struct to mark the current process as having a pending SIGVTALRM or a SIGPROF and let them be delivered during ast() when hardclock() has finished running. - Remove CLKF_BASEPRI, which was #ifdef'd out on the x86 anyways. It was broken on the x86 if it was turned on since cpl is gone. It's only use was to bogusly run softclock() directly during hardclock() rather than scheduling an SWI. - Remove the COM_LOCK simplelock and replace it with a clock_lock spin mutex. Since the spin mutex already handles disabling/restoring interrupts appropriately, this also lets us axe all the *_intr() fu. - Back out the hacks in the APIC_IO x86 cpu_initclocks() code to use temporary fast interrupts for the APIC trial. - Add two new process flags P_ALRMPEND and P_PROFPEND to mark the pending signals in hardclock() that are to be delivered in ast(). Submitted by: jakeb (making statclock safe in a fast interrupt) Submitted by: cp (concept of delaying signals until ast())
2000-10-06 02:20:21 +00:00
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
void
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
profclock(int usermode, uintfptr_t pc)
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
{
struct thread *td;
#ifdef GPROF
struct gmonparam *g;
uintfptr_t i;
#endif
Overhaul of the SMP code. Several portions of the SMP kernel support have been made machine independent and various other adjustments have been made to support Alpha SMP. - It splits the per-process portions of hardclock() and statclock() off into hardclock_process() and statclock_process() respectively. hardclock() and statclock() call the *_process() functions for the current process so that UP systems will run as before. For SMP systems, it is simply necessary to ensure that all other processors execute the *_process() functions when the main clock functions are triggered on one CPU by an interrupt. For the alpha 4100, clock interrupts are delievered in a staggered broadcast fashion, so we simply call hardclock/statclock on the boot CPU and call the *_process() functions on the secondaries. For x86, we call statclock and hardclock as usual and then call forward_hardclock/statclock in the MD code to send an IPI to cause the AP's to execute forwared_hardclock/statclock which then call the *_process() functions. - forward_signal() and forward_roundrobin() have been reworked to be MI and to involve less hackery. Now the cpu doing the forward sets any flags, etc. and sends a very simple IPI_AST to the other cpu(s). AST IPIs now just basically return so that they can execute ast() and don't bother with setting the astpending or needresched flags themselves. This also removes the loop in forward_signal() as sched_lock closes the race condition that the loop worked around. - need_resched(), resched_wanted() and clear_resched() have been changed to take a process to act on rather than assuming curproc so that they can be used to implement forward_roundrobin() as described above. - Various other SMP variables have been moved to a MI subr_smp.c and a new header sys/smp.h declares MI SMP variables and API's. The IPI API's from machine/ipl.h have moved to machine/smp.h which is included by sys/smp.h. - The globaldata_register() and globaldata_find() functions as well as the SLIST of globaldata structures has become MI and moved into subr_smp.c. Also, the globaldata list is only available if SMP support is compiled in. Reviewed by: jake, peter Looked over by: eivind
2001-04-27 19:28:25 +00:00
td = curthread;
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
if (usermode) {
/*
* Came from user mode; CPU was in user state.
* If this process is being profiled, record the tick.
* if there is no related user location yet, don't
* bother trying to count it.
*/
if (td->td_proc->p_flag & P_PROFIL)
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
addupc_intr(td, pc, 1);
}
#ifdef GPROF
else {
/*
* Kernel statistics are just like addupc_intr, only easier.
*/
g = &_gmonparam;
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
if (g->state == GMON_PROF_ON && pc >= g->lowpc) {
i = PC_TO_I(g, pc);
if (i < g->textsize) {
Tweak how the MD code calls the fooclock() methods some. Instead of passing a pointer to an opaque clockframe structure and requiring the MD code to supply CLKF_FOO() macros to extract needed values out of the opaque structure, just pass the needed values directly. In practice this means passing the pair (usermode, pc) to hardclock() and profclock() and passing the boolean (usermode) to hardclock_cpu() and hardclock_process(). Other details: - Axe clockframe and CLKF_FOO() macros on all architectures. Basically, all the archs were taking a trapframe and converting it into a clockframe one way or another. Now they can just extract the PC and usermode values directly out of the trapframe and pass it to fooclock(). - Renamed hardclock_process() to hardclock_cpu() as the latter is more accurate. - On Alpha, we now run profclock() at hz (profhz == hz) rather than at the slower stathz. - On Alpha, for the TurboLaser machines that don't have an 8254 timecounter, call hardclock() directly. This removes an extra conditional check from every clock interrupt on Alpha on the BSP. There is probably room for even further pruning here by changing Alpha to use the simplified timecounter we use on x86 with the lapic timer since we don't get interrupts from the 8254 on Alpha anyway. - On x86, clkintr() shouldn't ever be called now unless using_lapic_timer is false, so add a KASSERT() to that affect and remove a condition to slightly optimize the non-lapic case. - Change prototypeof arm_handler_execute() so that it's first arg is a trapframe pointer rather than a void pointer for clarity. - Use KCOUNT macro in profclock() to lookup the kernel profiling bucket. Tested on: alpha, amd64, arm, i386, ia64, sparc64 Reviewed by: bde (mostly)
2005-12-22 22:16:09 +00:00
KCOUNT(g, i)++;
}
}
}
#endif
1994-05-24 10:09:53 +00:00
}
/*
* Return information about system clocks.
*/
static int
sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
1994-05-24 10:09:53 +00:00
{
struct clockinfo clkinfo;
/*
* Construct clockinfo structure.
*/
bzero(&clkinfo, sizeof(clkinfo));
1994-05-24 10:09:53 +00:00
clkinfo.hz = hz;
clkinfo.tick = tick;
clkinfo.profhz = profhz;
clkinfo.stathz = stathz ? stathz : hz;
return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req));
1994-05-24 10:09:53 +00:00
}
SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
0, 0, sysctl_kern_clockrate, "S,clockinfo",
"Rate and period of various kernel clocks");
#ifdef SW_WATCHDOG
static void
watchdog_config(void *unused __unused, u_int cmd, int *error)
{
u_int u;
u = cmd & WD_INTERVAL;
if (u >= WD_TO_1SEC) {
watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
watchdog_enabled = 1;
*error = 0;
} else {
watchdog_enabled = 0;
}
}
/*
* Handle a watchdog timeout by dumping interrupt information and
* then either dropping to DDB or panicking.
*/
static void
watchdog_fire(void)
{
int nintr;
u_int64_t inttotal;
u_long *curintr;
char *curname;
curintr = intrcnt;
curname = intrnames;
inttotal = 0;
nintr = eintrcnt - intrcnt;
2006-04-17 20:14:51 +00:00
printf("interrupt total\n");
while (--nintr >= 0) {
if (*curintr)
printf("%-12s %20lu\n", curname, *curintr);
curname += strlen(curname) + 1;
inttotal += *curintr++;
}
printf("Total %20ju\n", (uintmax_t)inttotal);
#if defined(KDB) && !defined(KDB_UNATTENDED)
kdb_backtrace();
kdb_enter("watchdog timeout");
#else
panic("watchdog timeout");
#endif
}
#endif /* SW_WATCHDOG */