1994-05-24 10:09:53 +00:00
|
|
|
/*-
|
2017-11-20 19:43:44 +00:00
|
|
|
* SPDX-License-Identifier: BSD-3-Clause
|
|
|
|
*
|
1994-05-24 10:09:53 +00:00
|
|
|
* Copyright (c) 1982, 1986, 1990, 1991, 1993
|
|
|
|
* The Regents of the University of California. All rights reserved.
|
|
|
|
* (c) UNIX System Laboratories, Inc.
|
|
|
|
* All or some portions of this file are derived from material licensed
|
|
|
|
* to the University of California by American Telephone and Telegraph
|
|
|
|
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
|
|
|
|
* the permission of UNIX System Laboratories, Inc.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2016-09-15 13:16:20 +00:00
|
|
|
* 3. Neither the name of the University nor the names of its contributors
|
1994-05-24 10:09:53 +00:00
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*
|
1996-03-11 05:48:57 +00:00
|
|
|
* @(#)kern_synch.c 8.9 (Berkeley) 5/19/95
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
|
2003-06-11 00:56:59 +00:00
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
1996-01-03 21:42:35 +00:00
|
|
|
#include "opt_ktrace.h"
|
2008-04-17 04:20:10 +00:00
|
|
|
#include "opt_sched.h"
|
1996-01-03 21:42:35 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
2020-02-28 16:05:18 +00:00
|
|
|
#include <sys/blockcount.h>
|
2001-03-28 11:52:56 +00:00
|
|
|
#include <sys/condvar.h>
|
2004-07-10 21:36:01 +00:00
|
|
|
#include <sys/kdb.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/kernel.h>
|
2000-09-07 01:33:02 +00:00
|
|
|
#include <sys/ktr.h>
|
2000-12-06 00:33:58 +00:00
|
|
|
#include <sys/lock.h>
|
2000-10-20 07:52:10 +00:00
|
|
|
#include <sys/mutex.h>
|
2001-03-28 11:52:56 +00:00
|
|
|
#include <sys/proc.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/resourcevar.h>
|
2002-10-12 05:32:24 +00:00
|
|
|
#include <sys/sched.h>
|
2012-05-15 01:30:25 +00:00
|
|
|
#include <sys/sdt.h>
|
2001-03-28 11:52:56 +00:00
|
|
|
#include <sys/signalvar.h>
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
#include <sys/sleepqueue.h>
|
2001-04-27 19:28:25 +00:00
|
|
|
#include <sys/smp.h>
|
2001-03-28 11:52:56 +00:00
|
|
|
#include <sys/sx.h>
|
1997-08-08 22:48:57 +00:00
|
|
|
#include <sys/sysctl.h>
|
2000-12-02 05:41:30 +00:00
|
|
|
#include <sys/sysproto.h>
|
2001-03-28 11:52:56 +00:00
|
|
|
#include <sys/vmmeter.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
1998-03-28 10:33:27 +00:00
|
|
|
#include <sys/uio.h>
|
1994-05-24 10:09:53 +00:00
|
|
|
#include <sys/ktrace.h>
|
|
|
|
#endif
|
2019-10-15 21:32:38 +00:00
|
|
|
#ifdef EPOCH_TRACE
|
|
|
|
#include <sys/epoch.h>
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
|
|
|
|
#include <machine/cpu.h>
|
|
|
|
|
2004-01-25 07:49:45 +00:00
|
|
|
static void synch_setup(void *dummy);
|
2008-03-16 10:58:09 +00:00
|
|
|
SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
|
|
|
|
NULL);
|
1995-08-28 09:19:25 +00:00
|
|
|
|
1999-02-22 16:57:48 +00:00
|
|
|
int hogticks;
|
2019-12-24 16:19:33 +00:00
|
|
|
static const char pause_wchan[MAXCPU];
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-02-01 05:37:36 +00:00
|
|
|
static struct callout loadav_callout;
|
2000-11-27 22:52:31 +00:00
|
|
|
|
2001-10-20 13:10:43 +00:00
|
|
|
struct loadavg averunnable =
|
|
|
|
{ {0, 0, 0}, FSCALE }; /* load average, of runnable procs */
|
|
|
|
/*
|
|
|
|
* Constants for averages over 1, 5, and 15 minutes
|
|
|
|
* when sampling at 5 second intervals.
|
|
|
|
*/
|
|
|
|
static fixpt_t cexp[3] = {
|
|
|
|
0.9200444146293232 * FSCALE, /* exp(-1/12) */
|
|
|
|
0.9834714538216174 * FSCALE, /* exp(-1/60) */
|
|
|
|
0.9944598480048967 * FSCALE, /* exp(-1/180) */
|
|
|
|
};
|
|
|
|
|
2002-11-21 08:57:08 +00:00
|
|
|
/* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
|
2020-03-02 15:30:52 +00:00
|
|
|
SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE,
|
|
|
|
"Fixed-point scale factor used for calculating load average values");
|
2002-11-21 08:57:08 +00:00
|
|
|
|
2004-02-01 05:37:36 +00:00
|
|
|
static void loadav(void *arg);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2012-05-15 01:30:25 +00:00
|
|
|
SDT_PROVIDER_DECLARE(sched);
|
2013-11-26 08:46:27 +00:00
|
|
|
SDT_PROBE_DEFINE(sched, , , preempt);
|
2012-05-15 01:30:25 +00:00
|
|
|
|
2013-08-09 23:13:52 +00:00
|
|
|
static void
|
|
|
|
sleepinit(void *unused)
|
1996-07-31 09:26:54 +00:00
|
|
|
{
|
|
|
|
|
2002-10-12 05:32:24 +00:00
|
|
|
hogticks = (hz / 10) * 2; /* Default only. */
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
init_sleepqueues();
|
1996-07-31 09:26:54 +00:00
|
|
|
}
|
|
|
|
|
2013-08-09 23:13:52 +00:00
|
|
|
/*
|
|
|
|
* vmem tries to lock the sleepq mutexes when free'ing kva, so make sure
|
|
|
|
* it is available.
|
|
|
|
*/
|
2018-05-18 17:58:09 +00:00
|
|
|
SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, NULL);
|
2013-08-09 23:13:52 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2006-02-22 20:46:10 +00:00
|
|
|
* General sleep call. Suspends the current thread until a wakeup is
|
|
|
|
* performed on the specified identifier. The thread will then be made
|
2013-06-28 21:04:15 +00:00
|
|
|
* runnable with the specified priority. Sleeps at most sbt units of time
|
2013-01-05 00:23:26 +00:00
|
|
|
* (0 means no timeout). If pri includes the PCATCH flag, let signals
|
|
|
|
* interrupt the sleep, otherwise ignore them while sleeping. Returns 0 if
|
1994-05-24 10:09:53 +00:00
|
|
|
* awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a
|
2013-01-05 00:23:26 +00:00
|
|
|
* signal becomes pending, ERESTART is returned if the current system
|
1994-05-24 10:09:53 +00:00
|
|
|
* call should be restarted if possible, and EINTR is returned if the system
|
|
|
|
* call should be interrupted by the signal (return EINTR).
|
2000-09-11 00:20:02 +00:00
|
|
|
*
|
2007-03-09 22:41:01 +00:00
|
|
|
* The lock argument is unlocked before the caller is suspended, and
|
|
|
|
* re-locked before _sleep() returns. If priority includes the PDROP
|
|
|
|
* flag the lock is not re-locked before returning.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
int
|
2019-12-24 16:19:33 +00:00
|
|
|
_sleep(const void *ident, struct lock_object *lock, int priority,
|
2013-03-04 12:48:41 +00:00
|
|
|
const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
struct thread *td;
|
2007-03-09 22:41:01 +00:00
|
|
|
struct lock_class *class;
|
2013-09-20 23:06:21 +00:00
|
|
|
uintptr_t lock_state;
|
|
|
|
int catch, pri, rval, sleepq_flags;
|
2007-03-09 22:41:01 +00:00
|
|
|
WITNESS_SAVE_DECL(lock_witness);
|
1994-05-24 10:09:53 +00:00
|
|
|
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
td = curthread;
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
2002-06-07 05:39:16 +00:00
|
|
|
if (KTRPOINT(td, KTR_CSW))
|
2012-04-20 15:32:36 +00:00
|
|
|
ktrcsw(1, 0, wmesg);
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
2007-03-09 22:41:01 +00:00
|
|
|
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
|
|
|
|
"Sleeping on \"%s\"", wmesg);
|
2013-03-04 12:48:41 +00:00
|
|
|
KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL,
|
2008-08-20 12:20:22 +00:00
|
|
|
("sleeping without a lock"));
|
2017-01-16 20:34:42 +00:00
|
|
|
KASSERT(ident != NULL, ("_sleep: NULL ident"));
|
|
|
|
KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running"));
|
2008-08-07 21:00:13 +00:00
|
|
|
if (priority & PDROP)
|
|
|
|
KASSERT(lock != NULL && lock != &Giant.lock_object,
|
|
|
|
("PDROP requires a non-Giant lock"));
|
2007-03-09 22:41:01 +00:00
|
|
|
if (lock != NULL)
|
|
|
|
class = LOCK_CLASS(lock);
|
|
|
|
else
|
|
|
|
class = NULL;
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
|
2017-02-17 06:45:04 +00:00
|
|
|
if (SCHEDULER_STOPPED_TD(td)) {
|
2007-03-09 22:41:01 +00:00
|
|
|
if (lock != NULL && priority & PDROP)
|
|
|
|
class->lc_unlock(lock);
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
catch = priority & PCATCH;
|
2008-03-12 06:31:06 +00:00
|
|
|
pri = priority & PRIMASK;
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
|
2016-11-02 20:57:20 +00:00
|
|
|
KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep"));
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
|
2019-12-24 06:08:29 +00:00
|
|
|
if ((uintptr_t)ident >= (uintptr_t)&pause_wchan[0] &&
|
|
|
|
(uintptr_t)ident <= (uintptr_t)&pause_wchan[MAXCPU - 1])
|
2013-03-04 12:48:41 +00:00
|
|
|
sleepq_flags = SLEEPQ_PAUSE;
|
2007-02-23 16:22:09 +00:00
|
|
|
else
|
2013-03-04 12:48:41 +00:00
|
|
|
sleepq_flags = SLEEPQ_SLEEP;
|
2006-02-23 00:13:58 +00:00
|
|
|
if (catch)
|
2013-03-04 12:48:41 +00:00
|
|
|
sleepq_flags |= SLEEPQ_INTERRUPTIBLE;
|
2006-02-23 00:13:58 +00:00
|
|
|
|
2004-10-12 18:36:20 +00:00
|
|
|
sleepq_lock(ident);
|
2007-03-09 22:41:01 +00:00
|
|
|
CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)",
|
2017-12-25 04:48:39 +00:00
|
|
|
td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
|
2008-09-25 13:42:19 +00:00
|
|
|
if (lock == &Giant.lock_object)
|
|
|
|
mtx_assert(&Giant, MA_OWNED);
|
Change the preemption code for software interrupt thread schedules and
mutex releases to not require flags for the cases when preemption is
not allowed:
The purpose of the MTX_NOSWITCH and SWI_NOSWITCH flags is to prevent
switching to a higher priority thread on mutex releease and swi schedule,
respectively when that switch is not safe. Now that the critical section
API maintains a per-thread nesting count, the kernel can easily check
whether or not it should switch without relying on flags from the
programmer. This fixes a few bugs in that all current callers of
swi_sched() used SWI_NOSWITCH, when in fact, only the ones called from
fast interrupt handlers and the swi_sched of softclock needed this flag.
Note that to ensure that swi_sched()'s in clock and fast interrupt
handlers do not switch, these handlers have to be explicitly wrapped
in critical_enter/exit pairs. Presently, just wrapping the handlers is
sufficient, but in the future with the fully preemptive kernel, the
interrupt must be EOI'd before critical_exit() is called. (critical_exit()
can switch due to a deferred preemption in a fully preemptive kernel.)
I've tested the changes to the interrupt code on i386 and alpha. I have
not tested ia64, but the interrupt code is almost identical to the alpha
code, so I expect it will work fine. PowerPC and ARM do not yet have
interrupt code in the tree so they shouldn't be broken. Sparc64 is
broken, but that's been ok'd by jake and tmm who will be fixing the
interrupt code for sparc64 shortly.
Reviewed by: peter
Tested on: i386, alpha
2002-01-05 08:47:13 +00:00
|
|
|
DROP_GIANT();
|
2008-08-07 21:00:13 +00:00
|
|
|
if (lock != NULL && lock != &Giant.lock_object &&
|
|
|
|
!(class->lc_flags & LC_SLEEPABLE)) {
|
2007-03-09 22:41:01 +00:00
|
|
|
WITNESS_SAVE(lock, lock_witness);
|
|
|
|
lock_state = class->lc_unlock(lock);
|
|
|
|
} else
|
|
|
|
/* GCC needs to follow the Yellow Brick Road */
|
|
|
|
lock_state = -1;
|
2001-09-12 08:38:13 +00:00
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* We put ourselves on the sleep queue and start our timeout
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
* before calling thread_suspend_check, as we could stop there,
|
|
|
|
* and a wakeup or a SIGCONT (or both) could occur while we were
|
|
|
|
* stopped without resuming us. Thus, we must be ready for sleep
|
|
|
|
* when cursig() is called. If the wakeup happens while we're
|
|
|
|
* stopped, then td will no longer be on a sleep queue upon
|
|
|
|
* return from cursig().
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2013-03-04 12:48:41 +00:00
|
|
|
sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
|
2015-01-22 11:12:42 +00:00
|
|
|
if (sbt != 0)
|
|
|
|
sleepq_set_timeout_sbt(ident, sbt, pr, flags);
|
2007-05-08 21:49:59 +00:00
|
|
|
if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
|
|
|
|
sleepq_release(ident);
|
|
|
|
WITNESS_SAVE(lock, lock_witness);
|
|
|
|
lock_state = class->lc_unlock(lock);
|
|
|
|
sleepq_lock(ident);
|
|
|
|
}
|
2013-03-04 12:48:41 +00:00
|
|
|
if (sbt != 0 && catch)
|
2008-03-12 06:31:06 +00:00
|
|
|
rval = sleepq_timedwait_sig(ident, pri);
|
2013-03-04 12:48:41 +00:00
|
|
|
else if (sbt != 0)
|
2008-03-12 06:31:06 +00:00
|
|
|
rval = sleepq_timedwait(ident, pri);
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
else if (catch)
|
2008-03-12 06:31:06 +00:00
|
|
|
rval = sleepq_wait_sig(ident, pri);
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
else {
|
2008-03-12 06:31:06 +00:00
|
|
|
sleepq_wait(ident, pri);
|
Switch the sleep/wakeup and condition variable implementations to use the
sleep queue interface:
- Sleep queues attempt to merge some of the benefits of both sleep queues
and condition variables. Having sleep qeueus in a hash table avoids
having to allocate a queue head for each wait channel. Thus, struct cv
has shrunk down to just a single char * pointer now. However, the
hash table does not hold threads directly, but queue heads. This means
that once you have located a queue in the hash bucket, you no longer have
to walk the rest of the hash chain looking for threads. Instead, you have
a list of all the threads sleeping on that wait channel.
- Outside of the sleepq code and the sleep/cv code the kernel no longer
differentiates between cv's and sleep/wakeup. For example, calls to
abortsleep() and cv_abort() are replaced with a call to sleepq_abort().
Thus, the TDF_CVWAITQ flag is removed. Also, calls to unsleep() and
cv_waitq_remove() have been replaced with calls to sleepq_remove().
- The sched_sleep() function no longer accepts a priority argument as
sleep's no longer inherently bump the priority. Instead, this is soley
a propery of msleep() which explicitly calls sched_prio() before
blocking.
- The TDF_ONSLEEPQ flag has been dropped as it was never used. The
associated TDF_SET_ONSLEEPQ and TDF_CLR_ON_SLEEPQ macros have also been
dropped and replaced with a single explicit clearing of td_wchan.
TD_SET_ONSLEEPQ() would really have only made sense if it had taken
the wait channel and message as arguments anyway. Now that that only
happens in one place, a macro would be overkill.
2004-02-27 18:52:44 +00:00
|
|
|
rval = 0;
|
2001-06-22 23:11:26 +00:00
|
|
|
}
|
1994-05-24 10:09:53 +00:00
|
|
|
#ifdef KTRACE
|
2002-06-07 05:39:16 +00:00
|
|
|
if (KTRPOINT(td, KTR_CSW))
|
2012-04-20 15:32:36 +00:00
|
|
|
ktrcsw(0, 0, wmesg);
|
1994-05-24 10:09:53 +00:00
|
|
|
#endif
|
2002-06-07 05:39:16 +00:00
|
|
|
PICKUP_GIANT();
|
2008-08-07 21:00:13 +00:00
|
|
|
if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) {
|
2007-03-09 22:41:01 +00:00
|
|
|
class->lc_lock(lock, lock_state);
|
|
|
|
WITNESS_RESTORE(lock, lock_witness);
|
2000-09-11 00:20:02 +00:00
|
|
|
}
|
2000-09-07 01:33:02 +00:00
|
|
|
return (rval);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
2005-12-29 20:57:45 +00:00
|
|
|
int
|
2019-12-24 16:19:33 +00:00
|
|
|
msleep_spin_sbt(const void *ident, struct mtx *mtx, const char *wmesg,
|
2013-03-04 12:48:41 +00:00
|
|
|
sbintime_t sbt, sbintime_t pr, int flags)
|
2005-12-29 20:57:45 +00:00
|
|
|
{
|
|
|
|
struct thread *td;
|
|
|
|
int rval;
|
|
|
|
WITNESS_SAVE_DECL(mtx);
|
|
|
|
|
|
|
|
td = curthread;
|
|
|
|
KASSERT(mtx != NULL, ("sleeping without a mutex"));
|
2017-01-16 20:34:42 +00:00
|
|
|
KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident"));
|
|
|
|
KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running"));
|
2005-12-29 20:57:45 +00:00
|
|
|
|
2017-02-17 06:45:04 +00:00
|
|
|
if (SCHEDULER_STOPPED_TD(td))
|
2005-12-29 20:57:45 +00:00
|
|
|
return (0);
|
|
|
|
|
|
|
|
sleepq_lock(ident);
|
2007-02-27 18:46:07 +00:00
|
|
|
CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)",
|
2017-12-25 04:48:39 +00:00
|
|
|
td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
|
2005-12-29 20:57:45 +00:00
|
|
|
|
|
|
|
DROP_GIANT();
|
|
|
|
mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
|
2007-03-21 21:20:51 +00:00
|
|
|
WITNESS_SAVE(&mtx->lock_object, mtx);
|
2005-12-29 20:57:45 +00:00
|
|
|
mtx_unlock_spin(mtx);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We put ourselves on the sleep queue and start our timeout.
|
|
|
|
*/
|
2007-03-21 21:20:51 +00:00
|
|
|
sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
|
2015-01-22 11:12:42 +00:00
|
|
|
if (sbt != 0)
|
2013-03-04 12:48:41 +00:00
|
|
|
sleepq_set_timeout_sbt(ident, sbt, pr, flags);
|
2005-12-29 20:57:45 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Can't call ktrace with any spin locks held so it can lock the
|
|
|
|
* ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
|
|
|
|
* any spin lock. Thus, we have to drop the sleepq spin lock while
|
|
|
|
* we handle those requests. This is safe since we have placed our
|
|
|
|
* thread on the sleep queue already.
|
|
|
|
*/
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_CSW)) {
|
|
|
|
sleepq_release(ident);
|
2012-04-20 15:32:36 +00:00
|
|
|
ktrcsw(1, 0, wmesg);
|
2005-12-29 20:57:45 +00:00
|
|
|
sleepq_lock(ident);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef WITNESS
|
|
|
|
sleepq_release(ident);
|
|
|
|
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"",
|
|
|
|
wmesg);
|
|
|
|
sleepq_lock(ident);
|
|
|
|
#endif
|
2013-03-04 12:48:41 +00:00
|
|
|
if (sbt != 0)
|
2008-03-12 06:31:06 +00:00
|
|
|
rval = sleepq_timedwait(ident, 0);
|
2005-12-29 20:57:45 +00:00
|
|
|
else {
|
2008-03-12 06:31:06 +00:00
|
|
|
sleepq_wait(ident, 0);
|
2005-12-29 20:57:45 +00:00
|
|
|
rval = 0;
|
|
|
|
}
|
|
|
|
#ifdef KTRACE
|
|
|
|
if (KTRPOINT(td, KTR_CSW))
|
2012-04-20 15:32:36 +00:00
|
|
|
ktrcsw(0, 0, wmesg);
|
2005-12-29 20:57:45 +00:00
|
|
|
#endif
|
|
|
|
PICKUP_GIANT();
|
|
|
|
mtx_lock_spin(mtx);
|
2007-03-21 21:20:51 +00:00
|
|
|
WITNESS_RESTORE(&mtx->lock_object, mtx);
|
2005-12-29 20:57:45 +00:00
|
|
|
return (rval);
|
|
|
|
}
|
|
|
|
|
2007-02-23 16:22:09 +00:00
|
|
|
/*
|
2018-03-03 18:36:38 +00:00
|
|
|
* pause_sbt() delays the calling thread by the given signed binary
|
|
|
|
* time. During cold bootup, pause_sbt() uses the DELAY() function
|
|
|
|
* instead of the _sleep() function to do the waiting. The "sbt"
|
|
|
|
* argument must be greater than or equal to zero. A "sbt" value of
|
|
|
|
* zero is equivalent to a "sbt" value of one tick.
|
2007-02-23 16:22:09 +00:00
|
|
|
*/
|
|
|
|
int
|
2013-03-04 12:48:41 +00:00
|
|
|
pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
|
2007-02-23 16:22:09 +00:00
|
|
|
{
|
2018-03-03 18:36:38 +00:00
|
|
|
KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0"));
|
2007-02-23 16:22:09 +00:00
|
|
|
|
2011-11-20 08:29:23 +00:00
|
|
|
/* silently convert invalid timeouts */
|
2013-03-04 12:48:41 +00:00
|
|
|
if (sbt == 0)
|
|
|
|
sbt = tick_sbt;
|
2011-11-19 11:17:27 +00:00
|
|
|
|
2016-12-20 19:44:44 +00:00
|
|
|
if ((cold && curthread == &thread0) || kdb_active ||
|
|
|
|
SCHEDULER_STOPPED()) {
|
2011-11-19 11:17:27 +00:00
|
|
|
/*
|
2013-03-04 12:48:41 +00:00
|
|
|
* We delay one second at a time to avoid overflowing the
|
2011-11-20 08:29:23 +00:00
|
|
|
* system specific DELAY() function(s):
|
2011-11-19 11:17:27 +00:00
|
|
|
*/
|
2013-08-30 10:39:56 +00:00
|
|
|
while (sbt >= SBT_1S) {
|
2011-11-19 11:17:27 +00:00
|
|
|
DELAY(1000000);
|
2013-08-30 10:39:56 +00:00
|
|
|
sbt -= SBT_1S;
|
2011-11-19 11:17:27 +00:00
|
|
|
}
|
2013-08-30 10:39:56 +00:00
|
|
|
/* Do the delay remainder, if any */
|
2016-04-26 15:38:17 +00:00
|
|
|
sbt = howmany(sbt, SBT_1US);
|
2013-08-30 10:39:56 +00:00
|
|
|
if (sbt > 0)
|
|
|
|
DELAY(sbt);
|
2018-03-03 18:12:21 +00:00
|
|
|
return (EWOULDBLOCK);
|
2011-11-19 11:17:27 +00:00
|
|
|
}
|
2018-03-03 18:36:38 +00:00
|
|
|
return (_sleep(&pause_wchan[curcpu], NULL,
|
|
|
|
(flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags));
|
2007-02-23 16:22:09 +00:00
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2004-05-14 20:51:42 +00:00
|
|
|
* Make all threads sleeping on the specified identifier runnable.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
void
|
2019-12-24 16:19:33 +00:00
|
|
|
wakeup(const void *ident)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
int wakeup_swapper;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2004-10-12 18:36:20 +00:00
|
|
|
sleepq_lock(ident);
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
|
2008-03-12 06:31:06 +00:00
|
|
|
sleepq_release(ident);
|
2009-07-14 22:50:41 +00:00
|
|
|
if (wakeup_swapper) {
|
|
|
|
KASSERT(ident != &proc0,
|
|
|
|
("wakeup and wakeup_swapper and proc0"));
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
kick_proc0();
|
2009-07-14 22:50:41 +00:00
|
|
|
}
|
1996-07-31 09:26:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2004-05-14 20:51:42 +00:00
|
|
|
* Make a thread sleeping on the specified identifier runnable.
|
|
|
|
* May wake more than one thread if a target thread is currently
|
2001-07-06 01:16:43 +00:00
|
|
|
* swapped out.
|
1996-07-31 09:26:54 +00:00
|
|
|
*/
|
|
|
|
void
|
2019-12-24 16:19:33 +00:00
|
|
|
wakeup_one(const void *ident)
|
1996-07-31 09:26:54 +00:00
|
|
|
{
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
int wakeup_swapper;
|
1996-07-31 09:26:54 +00:00
|
|
|
|
2004-10-12 18:36:20 +00:00
|
|
|
sleepq_lock(ident);
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
|
Commit 2/14 of sched_lock decomposition.
- Adapt sleepqueues to the new thread_lock() mechanism.
- Delay assigning the sleep queue spinlock as the thread lock until after
we've checked for signals. It is illegal for a thread to return in
mi_switch() with any lock assigned to td_lock other than the scheduler
locks.
- Change sleepq_catch_signals() to do the switch if necessary to simplify
the callers.
- Simplify timeout handling now that locking a sleeping thread has the
side-effect of locking the sleepqueue. Some previous races are no
longer possible.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-04 23:50:56 +00:00
|
|
|
sleepq_release(ident);
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
if (wakeup_swapper)
|
|
|
|
kick_proc0();
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
Add wakeup_any(), cheaper wakeup_one() for taskqueue(9).
wakeup_one() and underlying sleepq_signal() spend additional time trying
to be fair, waking thread with highest priority, sleeping longest time.
But in case of taskqueue there are many absolutely identical threads, and
any fairness between them is quite pointless. It makes even worse, since
round-robin wakeups not only make previous CPU affinity in scheduler quite
useless, but also hide from user chance to see CPU bottlenecks, when
sequential workload with one request at a time looks evenly distributed
between multiple threads.
This change adds new SLEEPQ_UNFAIR flag to sleepq_signal(), making it wakeup
thread that went to sleep last, but no longer in context switch (to avoid
immediate spinning on the thread lock). On top of that new wakeup_any()
function is added, equivalent to wakeup_one(), but setting the flag.
On top of that taskqueue(9) is switchied to wakeup_any() to wakeup its
threads.
As result, on 72-core Xeon v4 machine sequential ZFS write to 12 ZVOLs
with 16KB block size spend 34% less time in wakeup_any() and descendants
then it was spending in wakeup_one(), and total write throughput increased
by ~10% with the same as before CPU usage.
Reviewed by: markj, mmacy
MFC after: 2 weeks
Sponsored by: iXsystems, Inc.
Differential Revision: https://reviews.freebsd.org/D20669
2019-06-20 01:15:33 +00:00
|
|
|
void
|
2019-12-24 16:19:33 +00:00
|
|
|
wakeup_any(const void *ident)
|
Add wakeup_any(), cheaper wakeup_one() for taskqueue(9).
wakeup_one() and underlying sleepq_signal() spend additional time trying
to be fair, waking thread with highest priority, sleeping longest time.
But in case of taskqueue there are many absolutely identical threads, and
any fairness between them is quite pointless. It makes even worse, since
round-robin wakeups not only make previous CPU affinity in scheduler quite
useless, but also hide from user chance to see CPU bottlenecks, when
sequential workload with one request at a time looks evenly distributed
between multiple threads.
This change adds new SLEEPQ_UNFAIR flag to sleepq_signal(), making it wakeup
thread that went to sleep last, but no longer in context switch (to avoid
immediate spinning on the thread lock). On top of that new wakeup_any()
function is added, equivalent to wakeup_one(), but setting the flag.
On top of that taskqueue(9) is switchied to wakeup_any() to wakeup its
threads.
As result, on 72-core Xeon v4 machine sequential ZFS write to 12 ZVOLs
with 16KB block size spend 34% less time in wakeup_any() and descendants
then it was spending in wakeup_one(), and total write throughput increased
by ~10% with the same as before CPU usage.
Reviewed by: markj, mmacy
MFC after: 2 weeks
Sponsored by: iXsystems, Inc.
Differential Revision: https://reviews.freebsd.org/D20669
2019-06-20 01:15:33 +00:00
|
|
|
{
|
|
|
|
int wakeup_swapper;
|
|
|
|
|
|
|
|
sleepq_lock(ident);
|
|
|
|
wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR,
|
|
|
|
0, 0);
|
|
|
|
sleepq_release(ident);
|
|
|
|
if (wakeup_swapper)
|
|
|
|
kick_proc0();
|
|
|
|
}
|
|
|
|
|
2020-02-28 16:05:18 +00:00
|
|
|
/*
|
|
|
|
* Signal sleeping waiters after the counter has reached zero.
|
|
|
|
*/
|
|
|
|
void
|
|
|
|
_blockcount_wakeup(blockcount_t *bc, u_int old)
|
|
|
|
{
|
|
|
|
|
|
|
|
KASSERT(_BLOCKCOUNT_WAITERS(old),
|
|
|
|
("%s: no waiters on %p", __func__, bc));
|
|
|
|
|
|
|
|
if (atomic_cmpset_int(&bc->__count, _BLOCKCOUNT_WAITERS_FLAG, 0))
|
|
|
|
wakeup(bc);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2020-04-21 17:13:06 +00:00
|
|
|
* Wait for a wakeup or a signal. This does not guarantee that the count is
|
|
|
|
* still zero on return. Callers wanting a precise answer should use
|
|
|
|
* blockcount_wait() with an interlock.
|
2020-02-28 16:05:18 +00:00
|
|
|
*
|
2020-04-21 17:13:06 +00:00
|
|
|
* If there is no work to wait for, return 0. If the sleep was interrupted by a
|
|
|
|
* signal, return EINTR or ERESTART, and return EAGAIN otherwise.
|
2020-02-28 16:05:18 +00:00
|
|
|
*/
|
|
|
|
int
|
|
|
|
_blockcount_sleep(blockcount_t *bc, struct lock_object *lock, const char *wmesg,
|
|
|
|
int prio)
|
|
|
|
{
|
|
|
|
void *wchan;
|
|
|
|
uintptr_t lock_state;
|
|
|
|
u_int old;
|
|
|
|
int ret;
|
2020-04-21 17:13:06 +00:00
|
|
|
bool catch, drop;
|
2020-02-28 16:05:18 +00:00
|
|
|
|
|
|
|
KASSERT(lock != &Giant.lock_object,
|
|
|
|
("%s: cannot use Giant as the interlock", __func__));
|
|
|
|
|
2020-04-21 17:13:06 +00:00
|
|
|
catch = (prio & PCATCH) != 0;
|
|
|
|
drop = (prio & PDROP) != 0;
|
|
|
|
prio &= PRIMASK;
|
|
|
|
|
2020-02-28 16:05:18 +00:00
|
|
|
/*
|
|
|
|
* Synchronize with the fence in blockcount_release(). If we end up
|
|
|
|
* waiting, the sleepqueue lock acquisition will provide the required
|
|
|
|
* side effects.
|
|
|
|
*
|
|
|
|
* If there is no work to wait for, but waiters are present, try to put
|
|
|
|
* ourselves to sleep to avoid jumping ahead.
|
|
|
|
*/
|
|
|
|
if (atomic_load_acq_int(&bc->__count) == 0) {
|
2020-04-21 17:13:06 +00:00
|
|
|
if (lock != NULL && drop)
|
2020-02-28 16:05:18 +00:00
|
|
|
LOCK_CLASS(lock)->lc_unlock(lock);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
lock_state = 0;
|
|
|
|
wchan = bc;
|
|
|
|
sleepq_lock(wchan);
|
|
|
|
DROP_GIANT();
|
|
|
|
if (lock != NULL)
|
|
|
|
lock_state = LOCK_CLASS(lock)->lc_unlock(lock);
|
|
|
|
old = blockcount_read(bc);
|
2020-04-21 17:13:06 +00:00
|
|
|
ret = 0;
|
2020-02-28 16:05:18 +00:00
|
|
|
do {
|
|
|
|
if (_BLOCKCOUNT_COUNT(old) == 0) {
|
|
|
|
sleepq_release(wchan);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
if (_BLOCKCOUNT_WAITERS(old))
|
|
|
|
break;
|
|
|
|
} while (!atomic_fcmpset_int(&bc->__count, &old,
|
|
|
|
old | _BLOCKCOUNT_WAITERS_FLAG));
|
2020-04-21 17:13:06 +00:00
|
|
|
sleepq_add(wchan, NULL, wmesg, catch ? SLEEPQ_INTERRUPTIBLE : 0, 0);
|
|
|
|
if (catch)
|
|
|
|
ret = sleepq_wait_sig(wchan, prio);
|
|
|
|
else
|
|
|
|
sleepq_wait(wchan, prio);
|
|
|
|
if (ret == 0)
|
|
|
|
ret = EAGAIN;
|
2020-02-28 16:05:18 +00:00
|
|
|
|
|
|
|
out:
|
|
|
|
PICKUP_GIANT();
|
2020-04-21 17:13:06 +00:00
|
|
|
if (lock != NULL && !drop)
|
2020-02-28 16:05:18 +00:00
|
|
|
LOCK_CLASS(lock)->lc_lock(lock, lock_state);
|
|
|
|
|
|
|
|
return (ret);
|
|
|
|
}
|
|
|
|
|
2008-03-10 03:16:51 +00:00
|
|
|
static void
|
|
|
|
kdb_switch(void)
|
|
|
|
{
|
|
|
|
thread_unlock(curthread);
|
|
|
|
kdb_backtrace();
|
|
|
|
kdb_reenter();
|
|
|
|
panic("%s: did not reenter debugger", __func__);
|
|
|
|
}
|
|
|
|
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
2004-05-14 20:51:42 +00:00
|
|
|
* The machine independent parts of context switching.
|
2019-12-15 21:26:50 +00:00
|
|
|
*
|
|
|
|
* The thread lock is required on entry and is no longer held on return.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
|
|
|
void
|
2019-12-15 21:26:50 +00:00
|
|
|
mi_switch(int flags)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2008-01-10 22:11:20 +00:00
|
|
|
uint64_t runtime, new_switchtime;
|
2003-04-02 23:53:30 +00:00
|
|
|
struct thread *td;
|
1994-05-24 10:09:53 +00:00
|
|
|
|
2003-04-02 23:53:30 +00:00
|
|
|
td = curthread; /* XXX */
|
Commit 2/14 of sched_lock decomposition.
- Adapt sleepqueues to the new thread_lock() mechanism.
- Delay assigning the sleep queue spinlock as the thread lock until after
we've checked for signals. It is illegal for a thread to return in
mi_switch() with any lock assigned to td_lock other than the scheduler
locks.
- Change sleepq_catch_signals() to do the switch if necessary to simplify
the callers.
- Simplify timeout handling now that locking a sleeping thread has the
side-effect of locking the sleepqueue. Some previous races are no
longer possible.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-04 23:50:56 +00:00
|
|
|
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
|
2002-09-11 08:13:56 +00:00
|
|
|
KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
|
2001-10-23 17:52:49 +00:00
|
|
|
#ifdef INVARIANTS
|
2003-05-05 21:12:36 +00:00
|
|
|
if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
|
2001-10-23 17:52:49 +00:00
|
|
|
mtx_assert(&Giant, MA_NOTOWNED);
|
|
|
|
#endif
|
2020-01-12 06:07:54 +00:00
|
|
|
KASSERT(td->td_critnest == 1 || KERNEL_PANICKED(),
|
|
|
|
("mi_switch: switch in a critical section"));
|
2004-01-25 03:54:52 +00:00
|
|
|
KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
|
|
|
|
("mi_switch: switch must be voluntary or involuntary"));
|
2000-09-07 01:33:02 +00:00
|
|
|
|
2006-06-03 20:49:44 +00:00
|
|
|
/*
|
|
|
|
* Don't perform context switches from the debugger.
|
|
|
|
*/
|
2008-03-10 03:16:51 +00:00
|
|
|
if (kdb_active)
|
|
|
|
kdb_switch();
|
2017-02-17 06:45:04 +00:00
|
|
|
if (SCHEDULER_STOPPED_TD(td))
|
panic: add a switch and infrastructure for stopping other CPUs in SMP case
Historical behavior of letting other CPUs merily go on is a default for
time being. The new behavior can be switched on via
kern.stop_scheduler_on_panic tunable and sysctl.
Stopping of the CPUs has (at least) the following benefits:
- more of the system state at panic time is preserved intact
- threads and interrupts do not interfere with dumping of the system
state
Only one thread runs uninterrupted after panic if stop_scheduler_on_panic
is set. That thread might call code that is also used in normal context
and that code might use locks to prevent concurrent execution of certain
parts. Those locks might be held by the stopped threads and would never
be released. To work around this issue, it was decided that instead of
explicit checks for panic context, we would rather put those checks
inside the locking primitives.
This change has substantial portions written and re-written by attilio
and kib at various times. Other changes are heavily based on the ideas
and patches submitted by jhb and mdf. bde has provided many insights
into the details and history of the current code.
The new behavior may cause problems for systems that use a USB keyboard
for interfacing with system console. This is because of some unusual
locking patterns in the ukbd code which have to be used because on one
hand ukbd is below syscons, but on the other hand it has to interface
with other usb code that uses regular mutexes/Giant for its concurrency
protection. Dumping to USB-connected disks may also be affected.
PR: amd64/139614 (at least)
In cooperation with: attilio, jhb, kib, mdf
Discussed with: arch@, bde
Tested by: Eugene Grosbein <eugen@grosbein.net>,
gnn,
Steven Hartland <killing@multiplay.co.uk>,
glebius,
Andrew Boyer <aboyer@averesystems.com>
(various versions of the patch)
MFC after: 3 months (or never)
2011-12-11 21:02:01 +00:00
|
|
|
return;
|
2011-02-08 00:16:36 +00:00
|
|
|
if (flags & SW_VOL) {
|
2007-06-01 01:12:45 +00:00
|
|
|
td->td_ru.ru_nvcsw++;
|
2011-02-08 00:16:36 +00:00
|
|
|
td->td_swvoltick = ticks;
|
2016-03-25 19:35:29 +00:00
|
|
|
} else {
|
2007-06-01 01:12:45 +00:00
|
|
|
td->td_ru.ru_nivcsw++;
|
2016-03-25 19:35:29 +00:00
|
|
|
td->td_swinvoltick = ticks;
|
|
|
|
}
|
2008-04-17 04:20:10 +00:00
|
|
|
#ifdef SCHED_STATS
|
|
|
|
SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
|
|
|
|
#endif
|
1994-05-24 10:09:53 +00:00
|
|
|
/*
|
|
|
|
* Compute the amount of time during which the current
|
2007-06-01 01:12:45 +00:00
|
|
|
* thread was running, and add that to its total so far.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
2006-02-07 21:22:02 +00:00
|
|
|
new_switchtime = cpu_ticks();
|
2008-01-10 22:11:20 +00:00
|
|
|
runtime = new_switchtime - PCPU_GET(switchtime);
|
|
|
|
td->td_runtime += runtime;
|
|
|
|
td->td_incruntime += runtime;
|
2007-06-01 01:12:45 +00:00
|
|
|
PCPU_SET(switchtime, new_switchtime);
|
2003-10-05 09:35:08 +00:00
|
|
|
td->td_generation++; /* bump preempt-detect counter */
|
- Remove 'struct vmmeter' from 'struct pcpu', leaving only global vmmeter
in place. To do per-cpu stats, convert all fields that previously were
maintained in the vmmeters that sit in pcpus to counter(9).
- Since some vmmeter stats may be touched at very early stages of boot,
before we have set up UMA and we can do counter_u64_alloc(), provide an
early counter mechanism:
o Leave one spare uint64_t in struct pcpu, named pc_early_dummy_counter.
o Point counter(9) fields of vmmeter to pcpu[0].pc_early_dummy_counter,
so that at early stages of boot, before counters are allocated we already
point to a counter that can be safely written to.
o For sparc64 that required a whole dummy pcpu[MAXCPU] array.
Further related changes:
- Don't include vmmeter.h into pcpu.h.
- vm.stats.vm.v_swappgsout and vm.stats.vm.v_swappgsin changed to 64-bit,
to match kernel representation.
- struct vmmeter hidden under _KERNEL, and only vmstat(1) is an exclusion.
This is based on benno@'s 4-year old patch:
https://lists.freebsd.org/pipermail/freebsd-arch/2013-July/014471.html
Reviewed by: kib, gallatin, marius, lidl
Differential Revision: https://reviews.freebsd.org/D10156
2017-04-17 17:34:47 +00:00
|
|
|
VM_CNT_INC(v_swtch);
|
2003-10-29 15:23:09 +00:00
|
|
|
PCPU_SET(switchticks, ticks);
|
2008-03-12 10:12:01 +00:00
|
|
|
CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)",
|
2016-06-05 17:04:03 +00:00
|
|
|
td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
|
2017-03-25 19:08:51 +00:00
|
|
|
#ifdef KDTRACE_HOOKS
|
2018-12-08 06:30:41 +00:00
|
|
|
if (SDT_PROBES_ENABLED() &&
|
2018-05-22 08:27:33 +00:00
|
|
|
((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 &&
|
|
|
|
(flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)))
|
2017-03-25 19:08:51 +00:00
|
|
|
SDT_PROBE0(sched, , , preempt);
|
|
|
|
#endif
|
2019-12-15 21:26:50 +00:00
|
|
|
sched_switch(td, flags);
|
2008-03-12 10:12:01 +00:00
|
|
|
CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
|
2016-06-05 17:04:03 +00:00
|
|
|
td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
|
2003-10-29 15:23:09 +00:00
|
|
|
|
2002-12-10 02:33:45 +00:00
|
|
|
/*
|
|
|
|
* If the last thread was exiting, finish cleaning it up.
|
|
|
|
*/
|
|
|
|
if ((td = PCPU_GET(deadthread))) {
|
|
|
|
PCPU_SET(deadthread, NULL);
|
|
|
|
thread_stash(td);
|
|
|
|
}
|
2019-12-15 21:26:50 +00:00
|
|
|
spinlock_exit();
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
* Change thread state to be runnable, placing it on the run queue if
|
|
|
|
* it is in memory. If it is swapped out, return true so our caller
|
|
|
|
* will know to awaken the swapper.
|
2019-12-15 21:11:15 +00:00
|
|
|
*
|
|
|
|
* Requires the thread lock on entry, drops on exit.
|
1994-05-24 10:09:53 +00:00
|
|
|
*/
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
int
|
2019-12-15 21:11:15 +00:00
|
|
|
setrunnable(struct thread *td, int srqflags)
|
1994-05-24 10:09:53 +00:00
|
|
|
{
|
2019-12-15 21:11:15 +00:00
|
|
|
int swapin;
|
2001-09-21 19:16:12 +00:00
|
|
|
|
Commit 2/14 of sched_lock decomposition.
- Adapt sleepqueues to the new thread_lock() mechanism.
- Delay assigning the sleep queue spinlock as the thread lock until after
we've checked for signals. It is illegal for a thread to return in
mi_switch() with any lock assigned to td_lock other than the scheduler
locks.
- Change sleepq_catch_signals() to do the switch if necessary to simplify
the callers.
- Simplify timeout handling now that locking a sleeping thread has the
side-effect of locking the sleepqueue. Some previous races are no
longer possible.
Tested by: kris, current@
Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
2007-06-04 23:50:56 +00:00
|
|
|
THREAD_LOCK_ASSERT(td, MA_OWNED);
|
2007-09-17 05:31:39 +00:00
|
|
|
KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
|
|
|
|
("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
|
2019-12-15 21:11:15 +00:00
|
|
|
|
|
|
|
swapin = 0;
|
2021-02-18 10:25:10 +00:00
|
|
|
switch (TD_GET_STATE(td)) {
|
Part 1 of KSE-III
The ability to schedule multiple threads per process
(one one cpu) by making ALL system calls optionally asynchronous.
to come: ia64 and power-pc patches, patches for gdb, test program (in tools)
Reviewed by: Almost everyone who counts
(at various times, peter, jhb, matt, alfred, mini, bernd,
and a cast of thousands)
NOTE: this is still Beta code, and contains lots of debugging stuff.
expect slight instability in signals..
2002-06-29 17:26:22 +00:00
|
|
|
case TDS_RUNNING:
|
2002-09-11 08:13:56 +00:00
|
|
|
case TDS_RUNQ:
|
2019-12-15 21:11:15 +00:00
|
|
|
break;
|
|
|
|
case TDS_CAN_RUN:
|
|
|
|
KASSERT((td->td_flags & TDF_INMEM) != 0,
|
|
|
|
("setrunnable: td %p not in mem, flags 0x%X inhibit 0x%X",
|
|
|
|
td, td->td_flags, td->td_inhibitors));
|
|
|
|
/* unlocks thread lock according to flags */
|
|
|
|
sched_wakeup(td, srqflags);
|
If a thread that is swapped out is made runnable, then the setrunnable()
routine wakes up proc0 so that proc0 can swap the thread back in.
Historically, this has been done by waking up proc0 directly from
setrunnable() itself via a wakeup(). When waking up a sleeping thread
that was swapped out (the usual case when waking proc0 since only sleeping
threads are eligible to be swapped out), this resulted in a bit of
recursion (e.g. wakeup() -> setrunnable() -> wakeup()).
With sleep queues having separate locks in 6.x and later, this caused a
spin lock LOR (sleepq lock -> sched_lock/thread lock -> sleepq lock).
An attempt was made to fix this in 7.0 by making the proc0 wakeup use
the ithread mechanism for doing the wakeup. However, this required
grabbing proc0's thread lock to perform the wakeup. If proc0 was asleep
elsewhere in the kernel (e.g. waiting for disk I/O), then this degenerated
into the same LOR since the thread lock would be some other sleepq lock.
Fix this by deferring the wakeup of the swapper until after the sleepq
lock held by the upper layer has been locked. The setrunnable() routine
now returns a boolean value to indicate whether or not proc0 needs to be
woken up. The end result is that consumers of the sleepq API such as
*sleep/wakeup, condition variables, sx locks, and lockmgr, have to wakeup
proc0 if they get a non-zero return value from sleepq_abort(),
sleepq_broadcast(), or sleepq_signal().
Discussed with: jeff
Glanced at by: sam
Tested by: Jurgen Weber jurgen - ish com au
MFC after: 2 weeks
2008-08-05 20:02:31 +00:00
|
|
|
return (0);
|
2002-09-11 08:13:56 +00:00
|
|
|
case TDS_INHIBITED:
|
|
|
|
/*
|
|
|
|
* If we are only inhibited because we are swapped out
|
2019-12-15 21:11:15 +00:00
|
|
|
* arrange to swap in this process.
|
2002-09-11 08:13:56 +00:00
|
|
|
*/
|
2019-12-15 21:11:15 +00:00
|
|
|
if (td->td_inhibitors == TDI_SWAPPED &&
|
|
|
|
(td->td_flags & TDF_SWAPINREQ) == 0) {
|
|
|
|
td->td_flags |= TDF_SWAPINREQ;
|
|
|
|
swapin = 1;
|
|
|
|
}
|
2002-09-11 08:13:56 +00:00
|
|
|
break;
|
1994-05-24 10:09:53 +00:00
|
|
|
default:
|
2021-02-18 10:25:10 +00:00
|
|
|
panic("setrunnable: state 0x%x", TD_GET_STATE(td));
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
2019-12-15 21:11:15 +00:00
|
|
|
if ((srqflags & (SRQ_HOLD | SRQ_HOLDTD)) == 0)
|
|
|
|
thread_unlock(td);
|
|
|
|
|
|
|
|
return (swapin);
|
1994-05-24 10:09:53 +00:00
|
|
|
}
|
1997-11-25 07:07:48 +00:00
|
|
|
|
2001-10-20 13:10:43 +00:00
|
|
|
/*
|
|
|
|
* Compute a tenex style load average of a quantity on
|
|
|
|
* 1, 5 and 15 minute intervals.
|
|
|
|
*/
|
|
|
|
static void
|
2004-02-01 05:37:36 +00:00
|
|
|
loadav(void *arg)
|
2001-10-20 13:10:43 +00:00
|
|
|
{
|
|
|
|
int i, nrun;
|
2001-10-20 16:07:17 +00:00
|
|
|
struct loadavg *avg;
|
2001-10-20 13:10:43 +00:00
|
|
|
|
2004-02-01 02:51:33 +00:00
|
|
|
nrun = sched_load();
|
2001-10-20 16:07:17 +00:00
|
|
|
avg = &averunnable;
|
2004-02-01 02:51:33 +00:00
|
|
|
|
2001-10-20 13:10:43 +00:00
|
|
|
for (i = 0; i < 3; i++)
|
|
|
|
avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
|
|
|
|
nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
|
2001-10-20 16:07:17 +00:00
|
|
|
|
2004-02-01 05:37:36 +00:00
|
|
|
/*
|
|
|
|
* Schedule the next update to occur after 5 seconds, but add a
|
|
|
|
* random variation to avoid synchronisation with processes that
|
|
|
|
* run at regular intervals.
|
|
|
|
*/
|
2013-03-04 11:22:19 +00:00
|
|
|
callout_reset_sbt(&loadav_callout,
|
2013-09-24 07:03:16 +00:00
|
|
|
SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US,
|
|
|
|
loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
|
2001-10-20 13:10:43 +00:00
|
|
|
}
|
|
|
|
|
1997-11-25 07:07:48 +00:00
|
|
|
/* ARGSUSED */
|
|
|
|
static void
|
2008-03-16 18:59:21 +00:00
|
|
|
synch_setup(void *dummy)
|
1997-11-25 07:07:48 +00:00
|
|
|
{
|
2015-05-22 17:05:21 +00:00
|
|
|
callout_init(&loadav_callout, 1);
|
2000-11-27 22:52:31 +00:00
|
|
|
|
1997-11-25 07:07:48 +00:00
|
|
|
/* Kick off timeout driven events by calling first time. */
|
2004-02-01 05:37:36 +00:00
|
|
|
loadav(NULL);
|
1997-11-25 07:07:48 +00:00
|
|
|
}
|
|
|
|
|
2011-02-08 00:16:36 +00:00
|
|
|
int
|
|
|
|
should_yield(void)
|
|
|
|
{
|
|
|
|
|
2013-11-26 14:00:50 +00:00
|
|
|
return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks);
|
2011-02-08 00:16:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
maybe_yield(void)
|
|
|
|
{
|
|
|
|
|
|
|
|
if (should_yield())
|
2011-05-13 05:27:58 +00:00
|
|
|
kern_yield(PRI_USER);
|
2011-02-08 00:16:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
kern_yield(int prio)
|
|
|
|
{
|
|
|
|
struct thread *td;
|
|
|
|
|
|
|
|
td = curthread;
|
|
|
|
DROP_GIANT();
|
|
|
|
thread_lock(td);
|
2011-05-13 05:27:58 +00:00
|
|
|
if (prio == PRI_USER)
|
|
|
|
prio = td->td_user_pri;
|
2011-02-08 00:16:36 +00:00
|
|
|
if (prio >= 0)
|
|
|
|
sched_prio(td, prio);
|
2019-12-15 21:26:50 +00:00
|
|
|
mi_switch(SW_VOL | SWT_RELINQUISH);
|
2011-02-08 00:16:36 +00:00
|
|
|
PICKUP_GIANT();
|
|
|
|
}
|
|
|
|
|
2000-12-02 05:41:30 +00:00
|
|
|
/*
|
2007-03-05 13:10:58 +00:00
|
|
|
* General purpose yield system call.
|
2000-12-02 05:41:30 +00:00
|
|
|
*/
|
|
|
|
int
|
2011-09-16 13:58:51 +00:00
|
|
|
sys_yield(struct thread *td, struct yield_args *uap)
|
2000-12-02 05:41:30 +00:00
|
|
|
{
|
2007-10-08 23:40:40 +00:00
|
|
|
|
|
|
|
thread_lock(td);
|
2011-01-06 22:19:15 +00:00
|
|
|
if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
|
|
|
|
sched_prio(td, PRI_MAX_TIMESHARE);
|
2019-12-15 21:26:50 +00:00
|
|
|
mi_switch(SW_VOL | SWT_RELINQUISH);
|
2007-10-08 23:40:40 +00:00
|
|
|
td->td_retval[0] = 0;
|
2000-12-02 05:41:30 +00:00
|
|
|
return (0);
|
|
|
|
}
|