freebsd-dev/sys/kern/sched_core.c
Julian Elischer 486a941418 Instead of doing comparisons using the pcpu area to see if
a thread is an idle thread, just see if it has the IDLETD
flag set. That flag will probably move to the pflags word
as it's permenent and never chenges for the life of the
system so it doesn't need locking.
2007-03-08 06:44:34 +00:00

1752 lines
41 KiB
C

/*-
* Copyright (c) 2005-2006, David Xu <yfxu@corp.netease.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_hwpmc_hooks.h"
#include "opt_sched.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resource.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/turnstile.h>
#include <sys/umtx.h>
#include <sys/unistd.h>
#include <sys/vmmeter.h>
#ifdef KTRACE
#include <sys/uio.h>
#include <sys/ktrace.h>
#endif
#ifdef HWPMC_HOOKS
#include <sys/pmckern.h>
#endif
#include <machine/cpu.h>
#include <machine/smp.h>
/* get process's nice value, skip value 20 which is not supported */
#define PROC_NICE(p) MIN((p)->p_nice, 19)
/* convert nice to kernel thread priority */
#define NICE_TO_PRI(nice) (PUSER + 20 + (nice))
/* get process's static priority */
#define PROC_PRI(p) NICE_TO_PRI(PROC_NICE(p))
/* convert kernel thread priority to user priority */
#define USER_PRI(pri) MIN((pri) - PUSER, 39)
/* convert nice value to user priority */
#define PROC_USER_PRI(p) (PROC_NICE(p) + 20)
/* maximum user priority, highest prio + 1 */
#define MAX_USER_PRI 40
/* maximum kernel priority its nice is 19 */
#define PUSER_MAX (PUSER + 39)
/* ticks and nanosecond converters */
#define NS_TO_HZ(n) ((n) / (1000000000 / hz))
#define HZ_TO_NS(h) ((h) * (1000000000 / hz))
/* ticks and microsecond converters */
#define MS_TO_HZ(m) ((m) / (1000000 / hz))
#define PRI_SCORE_RATIO 25
#define MAX_SCORE (MAX_USER_PRI * PRI_SCORE_RATIO / 100)
#define MAX_SLEEP_TIME (def_timeslice * MAX_SCORE)
#define NS_MAX_SLEEP_TIME (HZ_TO_NS(MAX_SLEEP_TIME))
#define STARVATION_TIME (MAX_SLEEP_TIME)
#define CURRENT_SCORE(ts) \
(MAX_SCORE * NS_TO_HZ((ts)->ts_slptime) / MAX_SLEEP_TIME)
#define SCALE_USER_PRI(x, upri) \
MAX(x * (upri + 1) / (MAX_USER_PRI/2), min_timeslice)
/*
* For a thread whose nice is zero, the score is used to determine
* if it is an interactive thread.
*/
#define INTERACTIVE_BASE_SCORE (MAX_SCORE * 20)/100
/*
* Calculate a score which a thread must have to prove itself is
* an interactive thread.
*/
#define INTERACTIVE_SCORE(ts) \
(PROC_NICE((ts)->ts_proc) * MAX_SCORE / 40 + INTERACTIVE_BASE_SCORE)
/* Test if a thread is an interactive thread */
#define THREAD_IS_INTERACTIVE(ts) \
((ts)->ts_thread->td_user_pri <= \
PROC_PRI((ts)->ts_proc) - INTERACTIVE_SCORE(ts))
/*
* Calculate how long a thread must sleep to prove itself is an
* interactive sleep.
*/
#define INTERACTIVE_SLEEP_TIME(ts) \
(HZ_TO_NS(MAX_SLEEP_TIME * \
(MAX_SCORE / 2 + INTERACTIVE_SCORE((ts)) + 1) / MAX_SCORE - 1))
#define CHILD_WEIGHT 90
#define PARENT_WEIGHT 90
#define EXIT_WEIGHT 3
#define SCHED_LOAD_SCALE 128UL
#define IDLE 0
#define IDLE_IDLE 1
#define NOT_IDLE 2
#define KQB_LEN (8) /* Number of priority status words. */
#define KQB_L2BPW (5) /* Log2(sizeof(rqb_word_t) * NBBY)). */
#define KQB_BPW (1<<KQB_L2BPW) /* Bits in an rqb_word_t. */
#define KQB_BIT(pri) (1 << ((pri) & (KQB_BPW - 1)))
#define KQB_WORD(pri) ((pri) >> KQB_L2BPW)
#define KQB_FFS(word) (ffs(word) - 1)
#define KQ_NQS 256
/*
* Type of run queue status word.
*/
typedef u_int32_t kqb_word_t;
/*
* Head of run queues.
*/
TAILQ_HEAD(krqhead, td_sched);
/*
* Bit array which maintains the status of a run queue. When a queue is
* non-empty the bit corresponding to the queue number will be set.
*/
struct krqbits {
kqb_word_t rqb_bits[KQB_LEN];
};
/*
* Run queue structure. Contains an array of run queues on which processes
* are placed, and a structure to maintain the status of each queue.
*/
struct krunq {
struct krqbits rq_status;
struct krqhead rq_queues[KQ_NQS];
};
/*
* The following datastructures are allocated within their parent structure
* but are scheduler specific.
*/
/*
* The schedulable entity that can be given a context to run. A process may
* have several of these.
*/
struct td_sched {
struct thread *ts_thread; /* (*) Active associated thread. */
TAILQ_ENTRY(td_sched) ts_procq; /* (j/z) Run queue. */
int ts_flags; /* (j) TSF_* flags. */
fixpt_t ts_pctcpu; /* (j) %cpu during p_swtime. */
u_char ts_rqindex; /* (j) Run queue index. */
int ts_slice; /* Time slice in ticks */
struct kseq *ts_kseq; /* Kseq the thread belongs to */
struct krunq *ts_runq; /* Assiociated runqueue */
#ifdef SMP
int ts_cpu; /* CPU that we have affinity for. */
int ts_wakeup_cpu; /* CPU that has activated us. */
#endif
int ts_activated; /* How is the thread activated. */
uint64_t ts_timestamp; /* Last timestamp dependent on state.*/
unsigned ts_lastran; /* Last timestamp the thread ran. */
/* The following variables are only used for pctcpu calculation */
int ts_ltick; /* Last tick that we were running on */
int ts_ftick; /* First tick that we were running on */
int ts_ticks; /* Tick count */
u_long ts_slptime; /* (j) Number of ticks we vol. slept */
u_long ts_runtime; /* (j) Temp total run time. */
};
#define td_sched td_sched
#define ts_proc ts_thread->td_proc
/* flags kept in ts_flags */
#define TSF_BOUND 0x0001 /* Thread can not migrate. */
#define TSF_PREEMPTED 0x0002 /* Thread was preempted. */
#define TSF_MIGRATING 0x0004 /* Thread is migrating. */
#define TSF_SLEEP 0x0008 /* Thread did sleep. */
#define TSF_DIDRUN 0x0010 /* Thread actually ran. */
#define TSF_EXIT 0x0020 /* Thread is being killed. */
#define TSF_NEXTRQ 0x0400 /* Thread should be in next queue. */
#define TSF_FIRST_SLICE 0x0800 /* Thread has first time slice left. */
/*
* Cpu percentage computation macros and defines.
*
* SCHED_CPU_TIME: Number of seconds to average the cpu usage across.
* SCHED_CPU_TICKS: Number of hz ticks to average the cpu usage across.
*/
#define SCHED_CPU_TIME 10
#define SCHED_CPU_TICKS (hz * SCHED_CPU_TIME)
/*
* kseq - per processor runqs and statistics.
*/
struct kseq {
struct krunq *ksq_curr; /* Current queue. */
struct krunq *ksq_next; /* Next timeshare queue. */
struct krunq ksq_timeshare[2]; /* Run queues for !IDLE. */
struct krunq ksq_idle; /* Queue of IDLE threads. */
int ksq_load;
uint64_t ksq_last_timestamp; /* Per-cpu last clock tick */
unsigned ksq_expired_tick; /* First expired tick */
signed char ksq_expired_nice; /* Lowest nice in nextq */
};
static struct td_sched kse0;
static int min_timeslice = 5;
static int def_timeslice = 100;
static int granularity = 10;
static int realstathz;
static int sched_tdcnt;
static struct kseq kseq_global;
/*
* One td_sched queue per processor.
*/
#ifdef SMP
static struct kseq kseq_cpu[MAXCPU];
#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)])
#define KSEQ_CPU(x) (&kseq_cpu[(x)])
#define KSEQ_ID(x) ((x) - kseq_cpu)
static cpumask_t cpu_sibling[MAXCPU];
#else /* !SMP */
#define KSEQ_SELF() (&kseq_global)
#define KSEQ_CPU(x) (&kseq_global)
#endif
/* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
static fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
SYSCTL_INT(_kern, OID_AUTO, ccpu, CTLFLAG_RD, &ccpu, 0, "");
static void sched_setup(void *dummy);
SYSINIT(sched_setup, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, sched_setup, NULL);
static void sched_initticks(void *dummy);
SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL)
static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0, "Scheduler");
SYSCTL_STRING(_kern_sched, OID_AUTO, name, CTLFLAG_RD, "CORE", 0,
"Scheduler name");
#ifdef SMP
/* Enable forwarding of wakeups to all other cpus */
SYSCTL_NODE(_kern_sched, OID_AUTO, ipiwakeup, CTLFLAG_RD, NULL, "Kernel SMP");
static int runq_fuzz = 0;
SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
static int forward_wakeup_enabled = 1;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, enabled, CTLFLAG_RW,
&forward_wakeup_enabled, 0,
"Forwarding of wakeup to idle CPUs");
static int forward_wakeups_requested = 0;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, requested, CTLFLAG_RD,
&forward_wakeups_requested, 0,
"Requests for Forwarding of wakeup to idle CPUs");
static int forward_wakeups_delivered = 0;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, delivered, CTLFLAG_RD,
&forward_wakeups_delivered, 0,
"Completed Forwarding of wakeup to idle CPUs");
static int forward_wakeup_use_mask = 1;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, usemask, CTLFLAG_RW,
&forward_wakeup_use_mask, 0,
"Use the mask of idle cpus");
static int forward_wakeup_use_loop = 0;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, useloop, CTLFLAG_RW,
&forward_wakeup_use_loop, 0,
"Use a loop to find idle cpus");
static int forward_wakeup_use_single = 0;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, onecpu, CTLFLAG_RW,
&forward_wakeup_use_single, 0,
"Only signal one idle cpu");
static int forward_wakeup_use_htt = 0;
SYSCTL_INT(_kern_sched_ipiwakeup, OID_AUTO, htt2, CTLFLAG_RW,
&forward_wakeup_use_htt, 0,
"account for htt");
#endif
static void krunq_add(struct krunq *, struct td_sched *);
static struct td_sched *krunq_choose(struct krunq *);
static void krunq_clrbit(struct krunq *rq, int pri);
static int krunq_findbit(struct krunq *rq);
static void krunq_init(struct krunq *);
static void krunq_remove(struct krunq *, struct td_sched *);
static struct td_sched * kseq_choose(struct kseq *);
static void kseq_load_add(struct kseq *, struct td_sched *);
static void kseq_load_rem(struct kseq *, struct td_sched *);
static void kseq_runq_add(struct kseq *, struct td_sched *);
static void kseq_runq_rem(struct kseq *, struct td_sched *);
static void kseq_setup(struct kseq *);
static int sched_is_timeshare(struct thread *td);
static int sched_calc_pri(struct td_sched *ts);
static int sched_starving(struct kseq *, unsigned, struct td_sched *);
static void sched_pctcpu_update(struct td_sched *);
static void sched_thread_priority(struct thread *, u_char);
static uint64_t sched_timestamp(void);
static int sched_recalc_pri(struct td_sched *ts, uint64_t now);
static int sched_timeslice(struct td_sched *ts);
static void sched_update_runtime(struct td_sched *ts, uint64_t now);
static void sched_commit_runtime(struct td_sched *ts);
/*
* Initialize a run structure.
*/
static void
krunq_init(struct krunq *rq)
{
int i;
bzero(rq, sizeof *rq);
for (i = 0; i < KQ_NQS; i++)
TAILQ_INIT(&rq->rq_queues[i]);
}
/*
* Clear the status bit of the queue corresponding to priority level pri,
* indicating that it is empty.
*/
static inline void
krunq_clrbit(struct krunq *rq, int pri)
{
struct krqbits *rqb;
rqb = &rq->rq_status;
rqb->rqb_bits[KQB_WORD(pri)] &= ~KQB_BIT(pri);
}
/*
* Find the index of the first non-empty run queue. This is done by
* scanning the status bits, a set bit indicates a non-empty queue.
*/
static int
krunq_findbit(struct krunq *rq)
{
struct krqbits *rqb;
int pri;
int i;
rqb = &rq->rq_status;
for (i = 0; i < KQB_LEN; i++) {
if (rqb->rqb_bits[i]) {
pri = KQB_FFS(rqb->rqb_bits[i]) + (i << KQB_L2BPW);
return (pri);
}
}
return (-1);
}
static int
krunq_check(struct krunq *rq)
{
struct krqbits *rqb;
int i;
rqb = &rq->rq_status;
for (i = 0; i < KQB_LEN; i++) {
if (rqb->rqb_bits[i])
return (1);
}
return (0);
}
/*
* Set the status bit of the queue corresponding to priority level pri,
* indicating that it is non-empty.
*/
static inline void
krunq_setbit(struct krunq *rq, int pri)
{
struct krqbits *rqb;
rqb = &rq->rq_status;
rqb->rqb_bits[KQB_WORD(pri)] |= KQB_BIT(pri);
}
/*
* Add the KSE to the queue specified by its priority, and set the
* corresponding status bit.
*/
static void
krunq_add(struct krunq *rq, struct td_sched *ts)
{
struct krqhead *rqh;
int pri;
pri = ts->ts_thread->td_priority;
ts->ts_rqindex = pri;
krunq_setbit(rq, pri);
rqh = &rq->rq_queues[pri];
if (ts->ts_flags & TSF_PREEMPTED)
TAILQ_INSERT_HEAD(rqh, ts, ts_procq);
else
TAILQ_INSERT_TAIL(rqh, ts, ts_procq);
}
/*
* Find the highest priority process on the run queue.
*/
static struct td_sched *
krunq_choose(struct krunq *rq)
{
struct krqhead *rqh;
struct td_sched *ts;
int pri;
mtx_assert(&sched_lock, MA_OWNED);
if ((pri = krunq_findbit(rq)) != -1) {
rqh = &rq->rq_queues[pri];
ts = TAILQ_FIRST(rqh);
KASSERT(ts != NULL, ("krunq_choose: no thread on busy queue"));
#ifdef SMP
if (pri <= PRI_MAX_ITHD || runq_fuzz <= 0)
return (ts);
/*
* In the first couple of entries, check if
* there is one for our CPU as a preference.
*/
struct td_sched *ts2 = ts;
const int mycpu = PCPU_GET(cpuid);
const int mymask = 1 << mycpu;
int count = runq_fuzz;
while (count-- && ts2) {
const int cpu = ts2->ts_wakeup_cpu;
if (cpu_sibling[cpu] & mymask) {
ts = ts2;
break;
}
ts2 = TAILQ_NEXT(ts2, ts_procq);
}
#endif
return (ts);
}
return (NULL);
}
/*
* Remove the KSE from the queue specified by its priority, and clear the
* corresponding status bit if the queue becomes empty.
* Caller must set state afterwards.
*/
static void
krunq_remove(struct krunq *rq, struct td_sched *ts)
{
struct krqhead *rqh;
int pri;
KASSERT(ts->ts_proc->p_sflag & PS_INMEM,
("runq_remove: process swapped out"));
pri = ts->ts_rqindex;
rqh = &rq->rq_queues[pri];
KASSERT(ts != NULL, ("krunq_remove: no proc on busy queue"));
TAILQ_REMOVE(rqh, ts, ts_procq);
if (TAILQ_EMPTY(rqh))
krunq_clrbit(rq, pri);
}
static inline void
kseq_runq_add(struct kseq *kseq, struct td_sched *ts)
{
krunq_add(ts->ts_runq, ts);
ts->ts_kseq = kseq;
}
static inline void
kseq_runq_rem(struct kseq *kseq, struct td_sched *ts)
{
krunq_remove(ts->ts_runq, ts);
ts->ts_kseq = NULL;
ts->ts_runq = NULL;
}
static inline void
kseq_load_add(struct kseq *kseq, struct td_sched *ts)
{
kseq->ksq_load++;
if ((ts->ts_proc->p_flag & P_NOLOAD) == 0)
sched_tdcnt++;
}
static inline void
kseq_load_rem(struct kseq *kseq, struct td_sched *ts)
{
kseq->ksq_load--;
if ((ts->ts_proc->p_flag & P_NOLOAD) == 0)
sched_tdcnt--;
}
/*
* Pick the highest priority task we have and return it.
*/
static struct td_sched *
kseq_choose(struct kseq *kseq)
{
struct krunq *swap;
struct td_sched *ts;
mtx_assert(&sched_lock, MA_OWNED);
ts = krunq_choose(kseq->ksq_curr);
if (ts != NULL)
return (ts);
kseq->ksq_expired_nice = PRIO_MAX + 1;
kseq->ksq_expired_tick = 0;
swap = kseq->ksq_curr;
kseq->ksq_curr = kseq->ksq_next;
kseq->ksq_next = swap;
ts = krunq_choose(kseq->ksq_curr);
if (ts != NULL)
return (ts);
return krunq_choose(&kseq->ksq_idle);
}
static inline uint64_t
sched_timestamp(void)
{
uint64_t now = cputick2usec(cpu_ticks()) * 1000;
return (now);
}
static inline int
sched_timeslice(struct td_sched *ts)
{
struct proc *p = ts->ts_proc;
if (ts->ts_proc->p_nice < 0)
return SCALE_USER_PRI(def_timeslice*4, PROC_USER_PRI(p));
else
return SCALE_USER_PRI(def_timeslice, PROC_USER_PRI(p));
}
static inline int
sched_is_timeshare(struct thread *td)
{
return (td->td_pri_class == PRI_TIMESHARE);
}
static int
sched_calc_pri(struct td_sched *ts)
{
int score, pri;
if (sched_is_timeshare(ts->ts_thread)) {
score = CURRENT_SCORE(ts) - MAX_SCORE / 2;
pri = PROC_PRI(ts->ts_proc) - score;
if (pri < PUSER)
pri = PUSER;
else if (pri > PUSER_MAX)
pri = PUSER_MAX;
return (pri);
}
return (ts->ts_thread->td_base_user_pri);
}
static int
sched_recalc_pri(struct td_sched *ts, uint64_t now)
{
uint64_t delta;
unsigned int sleep_time;
delta = now - ts->ts_timestamp;
if (__predict_false(!sched_is_timeshare(ts->ts_thread)))
return (ts->ts_thread->td_base_user_pri);
if (delta > NS_MAX_SLEEP_TIME)
sleep_time = NS_MAX_SLEEP_TIME;
else
sleep_time = (unsigned int)delta;
if (__predict_false(sleep_time == 0))
goto out;
if (ts->ts_activated != -1 &&
sleep_time > INTERACTIVE_SLEEP_TIME(ts)) {
ts->ts_slptime = HZ_TO_NS(MAX_SLEEP_TIME - def_timeslice);
} else {
sleep_time *= (MAX_SCORE - CURRENT_SCORE(ts)) ? : 1;
/*
* If thread is waking from uninterruptible sleep, it is
* unlikely an interactive sleep, limit its sleep time to
* prevent it from being an interactive thread.
*/
if (ts->ts_activated == -1) {
if (ts->ts_slptime >= INTERACTIVE_SLEEP_TIME(ts))
sleep_time = 0;
else if (ts->ts_slptime + sleep_time >=
INTERACTIVE_SLEEP_TIME(ts)) {
ts->ts_slptime = INTERACTIVE_SLEEP_TIME(ts);
sleep_time = 0;
}
}
/*
* Thread gets priority boost here.
*/
ts->ts_slptime += sleep_time;
/* Sleep time should never be larger than maximum */
if (ts->ts_slptime > NS_MAX_SLEEP_TIME)
ts->ts_slptime = NS_MAX_SLEEP_TIME;
}
out:
return (sched_calc_pri(ts));
}
static void
sched_update_runtime(struct td_sched *ts, uint64_t now)
{
uint64_t runtime;
if (sched_is_timeshare(ts->ts_thread)) {
if ((int64_t)(now - ts->ts_timestamp) < NS_MAX_SLEEP_TIME) {
runtime = now - ts->ts_timestamp;
if ((int64_t)(now - ts->ts_timestamp) < 0)
runtime = 0;
} else {
runtime = NS_MAX_SLEEP_TIME;
}
runtime /= (CURRENT_SCORE(ts) ? : 1);
ts->ts_runtime += runtime;
ts->ts_timestamp = now;
}
}
static void
sched_commit_runtime(struct td_sched *ts)
{
if (ts->ts_runtime > ts->ts_slptime)
ts->ts_slptime = 0;
else
ts->ts_slptime -= ts->ts_runtime;
ts->ts_runtime = 0;
}
static void
kseq_setup(struct kseq *kseq)
{
krunq_init(&kseq->ksq_timeshare[0]);
krunq_init(&kseq->ksq_timeshare[1]);
krunq_init(&kseq->ksq_idle);
kseq->ksq_curr = &kseq->ksq_timeshare[0];
kseq->ksq_next = &kseq->ksq_timeshare[1];
kseq->ksq_expired_nice = PRIO_MAX + 1;
kseq->ksq_expired_tick = 0;
}
static void
sched_setup(void *dummy)
{
#ifdef SMP
int i;
#endif
/*
* To avoid divide-by-zero, we set realstathz a dummy value
* in case which sched_clock() called before sched_initticks().
*/
realstathz = hz;
min_timeslice = MAX(5 * hz / 1000, 1);
def_timeslice = MAX(100 * hz / 1000, 1);
granularity = MAX(10 * hz / 1000, 1);
kseq_setup(&kseq_global);
#ifdef SMP
runq_fuzz = MIN(mp_ncpus * 2, 8);
/*
* Initialize the kseqs.
*/
for (i = 0; i < MAXCPU; i++) {
struct kseq *ksq;
ksq = &kseq_cpu[i];
kseq_setup(&kseq_cpu[i]);
cpu_sibling[i] = 1 << i;
}
if (smp_topology != NULL) {
int i, j;
cpumask_t visited;
struct cpu_group *cg;
visited = 0;
for (i = 0; i < smp_topology->ct_count; i++) {
cg = &smp_topology->ct_group[i];
if (cg->cg_mask & visited)
panic("duplicated cpumask in ct_group.");
if (cg->cg_mask == 0)
continue;
visited |= cg->cg_mask;
for (j = 0; j < MAXCPU; j++) {
if ((cg->cg_mask & (1 << j)) != 0)
cpu_sibling[j] |= cg->cg_mask;
}
}
}
#endif
mtx_lock_spin(&sched_lock);
kseq_load_add(KSEQ_SELF(), &kse0);
mtx_unlock_spin(&sched_lock);
}
/* ARGSUSED */
static void
sched_initticks(void *dummy)
{
mtx_lock_spin(&sched_lock);
realstathz = stathz ? stathz : hz;
mtx_unlock_spin(&sched_lock);
}
/*
* Very early in the boot some setup of scheduler-specific
* parts of proc0 and of soem scheduler resources needs to be done.
* Called from:
* proc0_init()
*/
void
schedinit(void)
{
/*
* Set up the scheduler specific parts of proc0.
*/
proc0.p_sched = NULL; /* XXX */
thread0.td_sched = &kse0;
kse0.ts_thread = &thread0;
kse0.ts_slice = 100;
}
/*
* This is only somewhat accurate since given many processes of the same
* priority they will switch when their slices run out, which will be
* at most SCHED_SLICE_MAX.
*/
int
sched_rr_interval(void)
{
return (def_timeslice);
}
static void
sched_pctcpu_update(struct td_sched *ts)
{
/*
* Adjust counters and watermark for pctcpu calc.
*/
if (ts->ts_ltick > ticks - SCHED_CPU_TICKS) {
/*
* Shift the tick count out so that the divide doesn't
* round away our results.
*/
ts->ts_ticks <<= 10;
ts->ts_ticks = (ts->ts_ticks / (ticks - ts->ts_ftick)) *
SCHED_CPU_TICKS;
ts->ts_ticks >>= 10;
} else
ts->ts_ticks = 0;
ts->ts_ltick = ticks;
ts->ts_ftick = ts->ts_ltick - SCHED_CPU_TICKS;
}
static void
sched_thread_priority(struct thread *td, u_char prio)
{
struct td_sched *ts;
ts = td->td_sched;
mtx_assert(&sched_lock, MA_OWNED);
if (__predict_false(td->td_priority == prio))
return;
if (TD_ON_RUNQ(td)) {
/*
* If the priority has been elevated due to priority
* propagation, we may have to move ourselves to a new
* queue. We still call adjustrunqueue below in case td_sched
* needs to fix things up.
*
* XXX td_priority is never set here.
*/
if (prio < td->td_priority && ts->ts_runq != NULL &&
ts->ts_runq != ts->ts_kseq->ksq_curr) {
krunq_remove(ts->ts_runq, ts);
ts->ts_runq = ts->ts_kseq->ksq_curr;
krunq_add(ts->ts_runq, ts);
}
if (ts->ts_rqindex != prio) {
sched_rem(td);
td->td_priority = prio;
sched_add(td, SRQ_BORING);
}
} else
td->td_priority = prio;
}
/*
* Update a thread's priority when it is lent another thread's
* priority.
*/
void
sched_lend_prio(struct thread *td, u_char prio)
{
td->td_flags |= TDF_BORROWING;
sched_thread_priority(td, prio);
}
/*
* Restore a thread's priority when priority propagation is
* over. The prio argument is the minimum priority the thread
* needs to have to satisfy other possible priority lending
* requests. If the thread's regular priority is less
* important than prio, the thread will keep a priority boost
* of prio.
*/
void
sched_unlend_prio(struct thread *td, u_char prio)
{
u_char base_pri;
if (td->td_base_pri >= PRI_MIN_TIMESHARE &&
td->td_base_pri <= PRI_MAX_TIMESHARE)
base_pri = td->td_user_pri;
else
base_pri = td->td_base_pri;
if (prio >= base_pri) {
td->td_flags &= ~TDF_BORROWING;
sched_thread_priority(td, base_pri);
} else
sched_lend_prio(td, prio);
}
void
sched_prio(struct thread *td, u_char prio)
{
u_char oldprio;
if (td->td_pri_class == PRI_TIMESHARE)
prio = MIN(prio, PUSER_MAX);
/* First, update the base priority. */
td->td_base_pri = prio;
/*
* If the thread is borrowing another thread's priority, don't
* ever lower the priority.
*/
if (td->td_flags & TDF_BORROWING && td->td_priority < prio)
return;
/* Change the real priority. */
oldprio = td->td_priority;
sched_thread_priority(td, prio);
/*
* If the thread is on a turnstile, then let the turnstile update
* its state.
*/
if (TD_ON_LOCK(td) && oldprio != prio)
turnstile_adjust(td, oldprio);
}
void
sched_user_prio(struct thread *td, u_char prio)
{
u_char oldprio;
td->td_base_user_pri = prio;
if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio)
return;
oldprio = td->td_user_pri;
td->td_user_pri = prio;
if (TD_ON_UPILOCK(td) && oldprio != prio)
umtx_pi_adjust(td, oldprio);
}
void
sched_lend_user_prio(struct thread *td, u_char prio)
{
u_char oldprio;
td->td_flags |= TDF_UBORROWING;
oldprio = td->td_user_pri;
td->td_user_pri = prio;
if (TD_ON_UPILOCK(td) && oldprio != prio)
umtx_pi_adjust(td, oldprio);
}
void
sched_unlend_user_prio(struct thread *td, u_char prio)
{
u_char base_pri;
base_pri = td->td_base_user_pri;
if (prio >= base_pri) {
td->td_flags &= ~TDF_UBORROWING;
sched_user_prio(td, base_pri);
} else
sched_lend_user_prio(td, prio);
}
void
sched_switch(struct thread *td, struct thread *newtd, int flags)
{
struct kseq *ksq;
struct td_sched *ts;
uint64_t now;
mtx_assert(&sched_lock, MA_OWNED);
now = sched_timestamp();
ts = td->td_sched;
ksq = KSEQ_SELF();
td->td_lastcpu = td->td_oncpu;
td->td_oncpu = NOCPU;
td->td_flags &= ~TDF_NEEDRESCHED;
td->td_owepreempt = 0;
if (TD_IS_IDLETHREAD(td)) {
TD_SET_CAN_RUN(td);
} else {
sched_update_runtime(ts, now);
/* We are ending our run so make our slot available again */
kseq_load_rem(ksq, ts);
if (TD_IS_RUNNING(td)) {
sched_add(td, (flags & SW_PREEMPT) ?
SRQ_OURSELF|SRQ_YIELDING|SRQ_PREEMPTED :
SRQ_OURSELF|SRQ_YIELDING);
} else {
ts->ts_flags &= ~TSF_NEXTRQ;
}
}
if (newtd != NULL) {
/*
* If we bring in a thread account for it as if it had been
* added to the run queue and then chosen.
*/
newtd->td_sched->ts_flags |= TSF_DIDRUN;
newtd->td_sched->ts_timestamp = now;
TD_SET_RUNNING(newtd);
kseq_load_add(ksq, newtd->td_sched);
} else {
newtd = choosethread();
/* sched_choose sets ts_timestamp, just reuse it */
}
if (td != newtd) {
ts->ts_lastran = tick;
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
#endif
cpu_switch(td, newtd);
#ifdef HWPMC_HOOKS
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
#endif
}
sched_lock.mtx_lock = (uintptr_t)td;
td->td_oncpu = PCPU_GET(cpuid);
}
void
sched_nice(struct proc *p, int nice)
{
struct thread *td;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_OWNED);
p->p_nice = nice;
FOREACH_THREAD_IN_PROC(p, td) {
if (td->td_pri_class == PRI_TIMESHARE) {
sched_user_prio(td, sched_calc_pri(td->td_sched));
td->td_flags |= TDF_NEEDRESCHED;
}
}
}
void
sched_sleep(struct thread *td)
{
struct td_sched *ts;
mtx_assert(&sched_lock, MA_OWNED);
ts = td->td_sched;
if (td->td_flags & TDF_SINTR)
ts->ts_activated = 0;
else
ts->ts_activated = -1;
ts->ts_flags |= TSF_SLEEP;
}
void
sched_wakeup(struct thread *td)
{
struct td_sched *ts;
struct kseq *kseq, *mykseq;
uint64_t now;
mtx_assert(&sched_lock, MA_OWNED);
ts = td->td_sched;
mykseq = KSEQ_SELF();
if (ts->ts_flags & TSF_SLEEP) {
ts->ts_flags &= ~TSF_SLEEP;
if (sched_is_timeshare(td)) {
sched_commit_runtime(ts);
now = sched_timestamp();
kseq = KSEQ_CPU(td->td_lastcpu);
#ifdef SMP
if (kseq != mykseq)
now = now - mykseq->ksq_last_timestamp +
kseq->ksq_last_timestamp;
#endif
sched_user_prio(td, sched_recalc_pri(ts, now));
}
}
sched_add(td, SRQ_BORING);
}
/*
* Penalize the parent for creating a new child and initialize the child's
* priority.
*/
void
sched_fork(struct thread *td, struct thread *childtd)
{
mtx_assert(&sched_lock, MA_OWNED);
sched_fork_thread(td, childtd);
}
void
sched_fork_thread(struct thread *td, struct thread *child)
{
struct td_sched *ts;
struct td_sched *ts2;
sched_newthread(child);
ts = td->td_sched;
ts2 = child->td_sched;
ts2->ts_slptime = ts2->ts_slptime * CHILD_WEIGHT / 100;
if (child->td_pri_class == PRI_TIMESHARE)
sched_user_prio(child, sched_calc_pri(ts2));
ts->ts_slptime = ts->ts_slptime * PARENT_WEIGHT / 100;
ts2->ts_slice = (ts->ts_slice + 1) >> 1;
ts2->ts_flags |= TSF_FIRST_SLICE | (ts->ts_flags & TSF_NEXTRQ);
ts2->ts_activated = 0;
ts->ts_slice >>= 1;
if (ts->ts_slice == 0) {
ts->ts_slice = 1;
sched_tick();
}
/* Grab our parents cpu estimation information. */
ts2->ts_ticks = ts->ts_ticks;
ts2->ts_ltick = ts->ts_ltick;
ts2->ts_ftick = ts->ts_ftick;
}
void
sched_class(struct thread *td, int class)
{
mtx_assert(&sched_lock, MA_OWNED);
td->td_pri_class = class;
}
/*
* Return some of the child's priority and interactivity to the parent.
*/
void
sched_exit(struct proc *p, struct thread *childtd)
{
mtx_assert(&sched_lock, MA_OWNED);
sched_exit_thread(FIRST_THREAD_IN_PROC(p), childtd);
}
void
sched_exit_thread(struct thread *td, struct thread *childtd)
{
struct td_sched *childke = childtd->td_sched;
struct td_sched *parentke = td->td_sched;
if (childke->ts_slptime < parentke->ts_slptime) {
parentke->ts_slptime = parentke->ts_slptime /
(EXIT_WEIGHT) * (EXIT_WEIGHT - 1) +
parentke->ts_slptime / EXIT_WEIGHT;
}
kseq_load_rem(KSEQ_SELF(), childke);
sched_update_runtime(childke, sched_timestamp());
sched_commit_runtime(childke);
if ((childke->ts_flags & TSF_FIRST_SLICE) &&
td->td_proc == childtd->td_proc->p_pptr) {
parentke->ts_slice += childke->ts_slice;
if (parentke->ts_slice > sched_timeslice(parentke))
parentke->ts_slice = sched_timeslice(parentke);
}
}
static int
sched_starving(struct kseq *ksq, unsigned now, struct td_sched *ts)
{
uint64_t delta;
if (ts->ts_proc->p_nice > ksq->ksq_expired_nice)
return (1);
if (ksq->ksq_expired_tick == 0)
return (0);
delta = HZ_TO_NS((uint64_t)now - ksq->ksq_expired_tick);
if (delta > STARVATION_TIME * ksq->ksq_load)
return (1);
return (0);
}
/*
* An interactive thread has smaller time slice granularity,
* a cpu hog can have larger granularity.
*/
static inline int
sched_timeslice_split(struct td_sched *ts)
{
int score, g;
score = (int)(MAX_SCORE - CURRENT_SCORE(ts));
if (score == 0)
score = 1;
#ifdef SMP
g = granularity * ((1 << score) - 1) * smp_cpus;
#else
g = granularity * ((1 << score) - 1);
#endif
return (ts->ts_slice >= g && ts->ts_slice % g == 0);
}
void
sched_tick(void)
{
struct thread *td;
struct proc *p;
struct td_sched *ts;
struct kseq *kseq;
uint64_t now;
int cpuid;
int class;
mtx_assert(&sched_lock, MA_OWNED);
td = curthread;
ts = td->td_sched;
p = td->td_proc;
class = PRI_BASE(td->td_pri_class);
now = sched_timestamp();
cpuid = PCPU_GET(cpuid);
kseq = KSEQ_CPU(cpuid);
kseq->ksq_last_timestamp = now;
if (class == PRI_IDLE) {
/*
* Processes of equal idle priority are run round-robin.
*/
if (!TD_IS_IDLETHREAD(td) && --ts->ts_slice <= 0) {
ts->ts_slice = def_timeslice;
td->td_flags |= TDF_NEEDRESCHED;
}
return;
}
if (class == PRI_REALTIME) {
/*
* Realtime scheduling, do round robin for RR class, FIFO
* is not affected.
*/
if (PRI_NEED_RR(td->td_pri_class) && --ts->ts_slice <= 0) {
ts->ts_slice = def_timeslice;
td->td_flags |= TDF_NEEDRESCHED;
}
return;
}
/*
* We skip kernel thread, though it may be classified as TIMESHARE.
*/
if (class != PRI_TIMESHARE || (p->p_flag & P_KTHREAD) != 0)
return;
if (--ts->ts_slice <= 0) {
td->td_flags |= TDF_NEEDRESCHED;
sched_update_runtime(ts, now);
sched_commit_runtime(ts);
sched_user_prio(td, sched_calc_pri(ts));
ts->ts_slice = sched_timeslice(ts);
ts->ts_flags &= ~TSF_FIRST_SLICE;
if (ts->ts_flags & TSF_BOUND || td->td_pinned) {
if (kseq->ksq_expired_tick == 0)
kseq->ksq_expired_tick = tick;
} else {
if (kseq_global.ksq_expired_tick == 0)
kseq_global.ksq_expired_tick = tick;
}
if (!THREAD_IS_INTERACTIVE(ts) ||
sched_starving(kseq, tick, ts) ||
sched_starving(&kseq_global, tick, ts)) {
/* The thead becomes cpu hog, schedule it off. */
ts->ts_flags |= TSF_NEXTRQ;
if (ts->ts_flags & TSF_BOUND || td->td_pinned) {
if (p->p_nice < kseq->ksq_expired_nice)
kseq->ksq_expired_nice = p->p_nice;
} else {
if (p->p_nice < kseq_global.ksq_expired_nice)
kseq_global.ksq_expired_nice =
p->p_nice;
}
}
} else {
/*
* Don't allow an interactive thread which has long timeslice
* to monopolize CPU, split the long timeslice into small
* chunks. This essentially does round-robin between
* interactive threads.
*/
if (THREAD_IS_INTERACTIVE(ts) && sched_timeslice_split(ts))
td->td_flags |= TDF_NEEDRESCHED;
}
}
void
sched_clock(struct thread *td)
{
struct td_sched *ts;
mtx_assert(&sched_lock, MA_OWNED);
ts = td->td_sched;
/* Adjust ticks for pctcpu */
ts->ts_ticks++;
ts->ts_ltick = ticks;
/* Go up to one second beyond our max and then trim back down */
if (ts->ts_ftick + SCHED_CPU_TICKS + hz < ts->ts_ltick)
sched_pctcpu_update(ts);
}
static int
kseq_runnable(struct kseq *kseq)
{
return (krunq_check(kseq->ksq_curr) ||
krunq_check(kseq->ksq_next) ||
krunq_check(&kseq->ksq_idle));
}
int
sched_runnable(void)
{
#ifdef SMP
return (kseq_runnable(&kseq_global) || kseq_runnable(KSEQ_SELF()));
#else
return (kseq_runnable(&kseq_global));
#endif
}
void
sched_userret(struct thread *td)
{
KASSERT((td->td_flags & TDF_BORROWING) == 0,
("thread with borrowed priority returning to userland"));
if (td->td_priority != td->td_user_pri) {
mtx_lock_spin(&sched_lock);
td->td_priority = td->td_user_pri;
td->td_base_pri = td->td_user_pri;
mtx_unlock_spin(&sched_lock);
}
}
struct thread *
sched_choose(void)
{
struct td_sched *ts;
struct kseq *kseq;
#ifdef SMP
struct td_sched *kecpu;
mtx_assert(&sched_lock, MA_OWNED);
kseq = &kseq_global;
ts = kseq_choose(&kseq_global);
kecpu = kseq_choose(KSEQ_SELF());
if (ts == NULL ||
(kecpu != NULL &&
kecpu->ts_thread->td_priority < ts->ts_thread->td_priority)) {
ts = kecpu;
kseq = KSEQ_SELF();
}
#else
kseq = &kseq_global;
ts = kseq_choose(kseq);
#endif
if (ts != NULL) {
kseq_runq_rem(kseq, ts);
ts->ts_flags &= ~TSF_PREEMPTED;
ts->ts_timestamp = sched_timestamp();
return (ts->ts_thread);
}
return (PCPU_GET(idlethread));
}
#ifdef SMP
static int
forward_wakeup(int cpunum, cpumask_t me)
{
cpumask_t map, dontuse;
cpumask_t map2;
struct pcpu *pc;
cpumask_t id, map3;
mtx_assert(&sched_lock, MA_OWNED);
CTR0(KTR_RUNQ, "forward_wakeup()");
if ((!forward_wakeup_enabled) ||
(forward_wakeup_use_mask == 0 && forward_wakeup_use_loop == 0))
return (0);
if (!smp_started || cold || panicstr)
return (0);
forward_wakeups_requested++;
/*
* check the idle mask we received against what we calculated before
* in the old version.
*/
/*
* don't bother if we should be doing it ourself..
*/
if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
return (0);
dontuse = me | stopped_cpus | hlt_cpus_mask;
map3 = 0;
if (forward_wakeup_use_loop) {
SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
id = pc->pc_cpumask;
if ( (id & dontuse) == 0 &&
pc->pc_curthread == pc->pc_idlethread) {
map3 |= id;
}
}
}
if (forward_wakeup_use_mask) {
map = 0;
map = idle_cpus_mask & ~dontuse;
/* If they are both on, compare and use loop if different */
if (forward_wakeup_use_loop) {
if (map != map3) {
printf("map (%02X) != map3 (%02X)\n",
map, map3);
map = map3;
}
}
} else {
map = map3;
}
/* If we only allow a specific CPU, then mask off all the others */
if (cpunum != NOCPU) {
KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
map &= (1 << cpunum);
} else {
/* Try choose an idle die. */
if (forward_wakeup_use_htt) {
map2 = (map & (map >> 1)) & 0x5555;
if (map2) {
map = map2;
}
}
/* set only one bit */
if (forward_wakeup_use_single) {
map = map & ((~map) + 1);
}
}
if (map) {
forward_wakeups_delivered++;
ipi_selected(map, IPI_AST);
return (1);
}
return (0);
}
#endif
void
sched_add(struct thread *td, int flags)
{
struct kseq *ksq;
struct td_sched *ts;
struct thread *mytd;
int class;
int nextrq;
int need_resched = 0;
#ifdef SMP
int cpu;
int mycpu;
int pinned;
struct kseq *myksq;
#endif
mtx_assert(&sched_lock, MA_OWNED);
CTR5(KTR_SCHED, "sched_add: %p(%s) prio %d by %p(%s)",
td, td->td_proc->p_comm, td->td_priority, curthread,
curthread->td_proc->p_comm);
KASSERT((td->td_inhibitors == 0),
("sched_add: trying to run inhibited thread"));
KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
("sched_add: bad thread state"));
TD_SET_RUNQ(td);
mytd = curthread;
ts = td->td_sched;
KASSERT(ts->ts_proc->p_sflag & PS_INMEM,
("sched_add: process swapped out"));
KASSERT(ts->ts_runq == NULL,
("sched_add: KSE %p is still assigned to a run queue", ts));
class = PRI_BASE(td->td_pri_class);
#ifdef SMP
mycpu = PCPU_GET(cpuid);
myksq = KSEQ_CPU(mycpu);
ts->ts_wakeup_cpu = mycpu;
#endif
nextrq = (ts->ts_flags & TSF_NEXTRQ);
ts->ts_flags &= ~TSF_NEXTRQ;
if (flags & SRQ_PREEMPTED)
ts->ts_flags |= TSF_PREEMPTED;
ksq = &kseq_global;
#ifdef SMP
if (td->td_pinned != 0) {
cpu = td->td_lastcpu;
ksq = KSEQ_CPU(cpu);
pinned = 1;
} else if ((ts)->ts_flags & TSF_BOUND) {
cpu = ts->ts_cpu;
ksq = KSEQ_CPU(cpu);
pinned = 1;
} else {
pinned = 0;
cpu = NOCPU;
}
#endif
switch (class) {
case PRI_ITHD:
case PRI_REALTIME:
ts->ts_runq = ksq->ksq_curr;
break;
case PRI_TIMESHARE:
if ((td->td_flags & TDF_BORROWING) == 0 && nextrq)
ts->ts_runq = ksq->ksq_next;
else
ts->ts_runq = ksq->ksq_curr;
break;
case PRI_IDLE:
/*
* This is for priority prop.
*/
if (td->td_priority < PRI_MIN_IDLE)
ts->ts_runq = ksq->ksq_curr;
else
ts->ts_runq = &ksq->ksq_idle;
break;
default:
panic("Unknown pri class.");
break;
}
#ifdef SMP
if ((ts->ts_runq == kseq_global.ksq_curr ||
ts->ts_runq == myksq->ksq_curr) &&
td->td_priority < mytd->td_priority) {
#else
if (ts->ts_runq == kseq_global.ksq_curr &&
td->td_priority < mytd->td_priority) {
#endif
struct krunq *rq;
rq = ts->ts_runq;
ts->ts_runq = NULL;
if ((flags & SRQ_YIELDING) == 0 && maybe_preempt(td))
return;
ts->ts_runq = rq;
need_resched = TDF_NEEDRESCHED;
}
kseq_runq_add(ksq, ts);
kseq_load_add(ksq, ts);
#ifdef SMP
if (pinned) {
if (cpu != mycpu) {
struct thread *running = pcpu_find(cpu)->pc_curthread;
if (ksq->ksq_curr == ts->ts_runq &&
running->td_priority < td->td_priority) {
if (td->td_priority <= PRI_MAX_ITHD)
ipi_selected(1 << cpu, IPI_PREEMPT);
else {
running->td_flags |= TDF_NEEDRESCHED;
ipi_selected(1 << cpu, IPI_AST);
}
}
} else
curthread->td_flags |= need_resched;
} else {
cpumask_t me = 1 << mycpu;
cpumask_t idle = idle_cpus_mask & me;
int forwarded = 0;
if (!idle && ((flags & SRQ_INTR) == 0) &&
(idle_cpus_mask & ~(hlt_cpus_mask | me)))
forwarded = forward_wakeup(cpu, me);
if (forwarded == 0)
curthread->td_flags |= need_resched;
}
#else
mytd->td_flags |= need_resched;
#endif
}
void
sched_rem(struct thread *td)
{
struct kseq *kseq;
struct td_sched *ts;
mtx_assert(&sched_lock, MA_OWNED);
ts = td->td_sched;
KASSERT(TD_ON_RUNQ(td),
("sched_rem: KSE not on run queue"));
kseq = ts->ts_kseq;
kseq_runq_rem(kseq, ts);
kseq_load_rem(kseq, ts);
TD_SET_CAN_RUN(td);
}
fixpt_t
sched_pctcpu(struct thread *td)
{
fixpt_t pctcpu;
struct td_sched *ts;
pctcpu = 0;
ts = td->td_sched;
if (ts == NULL)
return (0);
mtx_lock_spin(&sched_lock);
if (ts->ts_ticks) {
int rtick;
/*
* Don't update more frequently than twice a second. Allowing
* this causes the cpu usage to decay away too quickly due to
* rounding errors.
*/
if (ts->ts_ftick + SCHED_CPU_TICKS < ts->ts_ltick ||
ts->ts_ltick < (ticks - (hz / 2)))
sched_pctcpu_update(ts);
/* How many rtick per second ? */
rtick = MIN(ts->ts_ticks / SCHED_CPU_TIME, SCHED_CPU_TICKS);
pctcpu = (FSCALE * ((FSCALE * rtick)/realstathz)) >> FSHIFT;
}
ts->ts_proc->p_swtime = ts->ts_ltick - ts->ts_ftick;
mtx_unlock_spin(&sched_lock);
return (pctcpu);
}
void
sched_bind(struct thread *td, int cpu)
{
struct td_sched *ts;
mtx_assert(&sched_lock, MA_OWNED);
ts = td->td_sched;
ts->ts_flags |= TSF_BOUND;
#ifdef SMP
ts->ts_cpu = cpu;
if (PCPU_GET(cpuid) == cpu)
return;
mi_switch(SW_VOL, NULL);
#endif
}
void
sched_unbind(struct thread *td)
{
mtx_assert(&sched_lock, MA_OWNED);
td->td_sched->ts_flags &= ~TSF_BOUND;
}
int
sched_is_bound(struct thread *td)
{
mtx_assert(&sched_lock, MA_OWNED);
return (td->td_sched->ts_flags & TSF_BOUND);
}
int
sched_load(void)
{
return (sched_tdcnt);
}
void
sched_relinquish(struct thread *td)
{
mtx_lock_spin(&sched_lock);
if (sched_is_timeshare(td)) {
sched_prio(td, PRI_MAX_TIMESHARE);
td->td_sched->ts_flags |= TSF_NEXTRQ;
}
mi_switch(SW_VOL, NULL);
mtx_unlock_spin(&sched_lock);
}
int
sched_sizeof_proc(void)
{
return (sizeof(struct proc));
}
int
sched_sizeof_thread(void)
{
return (sizeof(struct thread) + sizeof(struct td_sched));
}
/*
* The actual idle process.
*/
void
sched_idletd(void *dummy)
{
struct proc *p;
struct thread *td;
#ifdef SMP
cpumask_t mycpu;
#endif
td = curthread;
p = td->td_proc;
#ifdef SMP
mycpu = PCPU_GET(cpumask);
mtx_lock_spin(&sched_lock);
idle_cpus_mask |= mycpu;
mtx_unlock_spin(&sched_lock);
#endif
for (;;) {
mtx_assert(&Giant, MA_NOTOWNED);
while (sched_runnable() == 0)
cpu_idle();
mtx_lock_spin(&sched_lock);
#ifdef SMP
idle_cpus_mask &= ~mycpu;
#endif
mi_switch(SW_VOL, NULL);
#ifdef SMP
idle_cpus_mask |= mycpu;
#endif
mtx_unlock_spin(&sched_lock);
}
}
#define KERN_SWITCH_INCLUDE 1
#include "kern/kern_switch.c"