d5a08a6065
- All processes go into the same array of queues, with different scheduling classes using different portions of the array. This allows user processes to have their priorities propogated up into interrupt thread range if need be. - I chose 64 run queues as an arbitrary number that is greater than 32. We used to have 4 separate arrays of 32 queues each, so this may not be optimal. The new run queue code was written with this in mind; changing the number of run queues only requires changing constants in runq.h and adjusting the priority levels. - The new run queue code takes the run queue as a parameter. This is intended to be used to create per-cpu run queues. Implement wrappers for compatibility with the old interface which pass in the global run queue structure. - Group the priority level, user priority, native priority (before propogation) and the scheduling class into a struct priority. - Change any hard coded priority levels that I found to use symbolic constants (TTIPRI and TTOPRI). - Remove the curpriority global variable and use that of curproc. This was used to detect when a process' priority had lowered and it should yield. We now effectively yield on every interrupt. - Activate propogate_priority(). It should now have the desired effect without needing to also propogate the scheduling class. - Temporarily comment out the call to vm_page_zero_idle() in the idle loop. It interfered with propogate_priority() because the idle process needed to do a non-blocking acquire of Giant and then other processes would try to propogate their priority onto it. The idle process should not do anything except idle. vm_page_zero_idle() will return in the form of an idle priority kernel thread which is woken up at apprioriate times by the vm system. - Update struct kinfo_proc to the new priority interface. Deliberately change its size by adjusting the spare fields. It remained the same size, but the layout has changed, so userland processes that use it would parse the data incorrectly. The size constraint should really be changed to an arbitrary version number. Also add a debug.sizeof sysctl node for struct kinfo_proc.
538 lines
13 KiB
C
538 lines
13 KiB
C
/*
|
|
* Copyright (c) 1997, Stefan Esser <se@freebsd.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice unmodified, this list of conditions, and the following
|
|
* disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*
|
|
*/
|
|
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/rtprio.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/ipl.h>
|
|
#include <sys/interrupt.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/kthread.h>
|
|
#include <sys/ktr.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/resourcevar.h>
|
|
#include <sys/unistd.h>
|
|
#include <sys/vmmeter.h>
|
|
#include <machine/atomic.h>
|
|
#include <machine/cpu.h>
|
|
#include <machine/md_var.h>
|
|
#include <machine/stdarg.h>
|
|
|
|
#include <net/netisr.h> /* prototype for legacy_setsoftnet */
|
|
|
|
void *net_ih;
|
|
void *vm_ih;
|
|
void *softclock_ih;
|
|
struct ithd *clk_ithd;
|
|
struct ithd *tty_ithd;
|
|
|
|
static struct mtx ithread_list_lock;
|
|
|
|
static MALLOC_DEFINE(M_ITHREAD, "ithread", "Interrupt Threads");
|
|
|
|
static void ithread_update(struct ithd *);
|
|
static void ithread_loop(void *);
|
|
static void ithread_init(void *);
|
|
static void start_softintr(void *);
|
|
static void swi_net(void *);
|
|
|
|
u_char
|
|
ithread_priority(enum intr_type flags)
|
|
{
|
|
u_char pri;
|
|
|
|
flags &= (INTR_TYPE_TTY | INTR_TYPE_BIO | INTR_TYPE_NET |
|
|
INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK);
|
|
switch (flags) {
|
|
case INTR_TYPE_TTY:
|
|
pri = PI_TTYLOW;
|
|
break;
|
|
case INTR_TYPE_BIO:
|
|
/*
|
|
* XXX We need to refine this. BSD/OS distinguishes
|
|
* between tape and disk priorities.
|
|
*/
|
|
pri = PI_DISK;
|
|
break;
|
|
case INTR_TYPE_NET:
|
|
pri = PI_NET;
|
|
break;
|
|
case INTR_TYPE_CAM:
|
|
pri = PI_DISK; /* XXX or PI_CAM? */
|
|
break;
|
|
case INTR_TYPE_CLK:
|
|
pri = PI_REALTIME;
|
|
break;
|
|
case INTR_TYPE_MISC:
|
|
pri = PI_DULL; /* don't care */
|
|
break;
|
|
default:
|
|
/* We didn't specify an interrupt level. */
|
|
panic("ithread_priority: no interrupt type in flags");
|
|
}
|
|
|
|
return pri;
|
|
}
|
|
|
|
/*
|
|
* Regenerate the name (p_comm) and priority for a threaded interrupt thread.
|
|
*/
|
|
static void
|
|
ithread_update(struct ithd *ithd)
|
|
{
|
|
struct intrhand *ih;
|
|
struct proc *p;
|
|
int entropy;
|
|
|
|
p = ithd->it_proc;
|
|
if (p == NULL)
|
|
return;
|
|
|
|
strncpy(p->p_comm, ithd->it_name, sizeof(ithd->it_name));
|
|
ih = TAILQ_FIRST(&ithd->it_handlers);
|
|
if (ih == NULL) {
|
|
p->p_pri.pri_level = PRI_MAX_ITHD;
|
|
ithd->it_flags &= ~IT_ENTROPY;
|
|
return;
|
|
}
|
|
|
|
entropy = 0;
|
|
p->p_pri.pri_level = ih->ih_pri;
|
|
TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) {
|
|
if (strlen(p->p_comm) + strlen(ih->ih_name) + 1 <
|
|
sizeof(p->p_comm)) {
|
|
strcat(p->p_comm, " ");
|
|
strcat(p->p_comm, ih->ih_name);
|
|
} else if (strlen(p->p_comm) + 1 == sizeof(p->p_comm)) {
|
|
if (p->p_comm[sizeof(p->p_comm) - 2] == '+')
|
|
p->p_comm[sizeof(p->p_comm) - 2] = '*';
|
|
else
|
|
p->p_comm[sizeof(p->p_comm) - 2] = '+';
|
|
} else
|
|
strcat(p->p_comm, "+");
|
|
if (ih->ih_flags & IH_ENTROPY)
|
|
entropy++;
|
|
}
|
|
|
|
if (entropy) {
|
|
printf("Warning, ithread (%d, %s) is an entropy source.\n",
|
|
p->p_pid, p->p_comm);
|
|
ithd->it_flags |= IT_ENTROPY;
|
|
}
|
|
else
|
|
ithd->it_flags &= ~IT_ENTROPY;
|
|
}
|
|
|
|
int
|
|
ithread_create(struct ithd **ithread, int vector, int flags,
|
|
void (*disable)(int), void (*enable)(int), const char *fmt, ...)
|
|
{
|
|
struct ithd *ithd;
|
|
struct proc *p;
|
|
int error;
|
|
va_list ap;
|
|
|
|
ithd = malloc(sizeof(struct ithd), M_ITHREAD, M_WAITOK | M_ZERO);
|
|
ithd->it_vector = vector;
|
|
ithd->it_disable = disable;
|
|
ithd->it_enable = enable;
|
|
ithd->it_flags = flags;
|
|
TAILQ_INIT(&ithd->it_handlers);
|
|
|
|
va_start(ap, fmt);
|
|
vsnprintf(ithd->it_name, sizeof(ithd->it_name), fmt, ap);
|
|
va_end(ap);
|
|
|
|
error = kthread_create(ithread_loop, ithd, &p, RFSTOPPED | RFHIGHPID,
|
|
ithd->it_name);
|
|
if (error) {
|
|
free(ithd, M_ITHREAD);
|
|
return (error);
|
|
}
|
|
p->p_pri.pri_class = PRI_ITHD;
|
|
p->p_pri.pri_level = PRI_MAX_ITHD;
|
|
p->p_stat = SWAIT;
|
|
ithd->it_proc = p;
|
|
p->p_ithd = ithd;
|
|
if (ithread != NULL)
|
|
*ithread = ithd;
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
ithread_destroy(struct ithd *ithread)
|
|
{
|
|
|
|
if (ithread == NULL || !TAILQ_EMPTY(&ithread->it_handlers))
|
|
return (EINVAL);
|
|
|
|
mtx_lock_spin(&sched_lock);
|
|
ithread->it_flags |= IT_DEAD;
|
|
if (ithread->it_proc->p_stat == SWAIT) {
|
|
ithread->it_proc->p_stat = SRUN;
|
|
setrunqueue(ithread->it_proc);
|
|
}
|
|
mtx_unlock_spin(&sched_lock);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
ithread_add_handler(struct ithd* ithread, const char *name,
|
|
driver_intr_t handler, void *arg, u_char pri, enum intr_type flags,
|
|
void **cookiep)
|
|
{
|
|
struct intrhand *ih, *temp_ih;
|
|
|
|
if (ithread == NULL || name == NULL || handler == NULL)
|
|
return (EINVAL);
|
|
if ((flags & INTR_FAST) !=0)
|
|
flags |= INTR_EXCL;
|
|
|
|
ih = malloc(sizeof(struct intrhand), M_ITHREAD, M_WAITOK | M_ZERO);
|
|
ih->ih_handler = handler;
|
|
ih->ih_argument = arg;
|
|
ih->ih_name = name;
|
|
ih->ih_ithread = ithread;
|
|
ih->ih_pri = pri;
|
|
if (flags & INTR_FAST)
|
|
ih->ih_flags = IH_FAST | IH_EXCLUSIVE;
|
|
else if (flags & INTR_EXCL)
|
|
ih->ih_flags = IH_EXCLUSIVE;
|
|
if (flags & INTR_MPSAFE)
|
|
ih->ih_flags |= IH_MPSAFE;
|
|
if (flags & INTR_ENTROPY)
|
|
ih->ih_flags |= IH_ENTROPY;
|
|
|
|
mtx_lock_spin(&ithread_list_lock);
|
|
if ((flags & INTR_EXCL) !=0 && !TAILQ_EMPTY(&ithread->it_handlers))
|
|
goto fail;
|
|
if (!TAILQ_EMPTY(&ithread->it_handlers) &&
|
|
(TAILQ_FIRST(&ithread->it_handlers)->ih_flags & IH_EXCLUSIVE) != 0)
|
|
goto fail;
|
|
|
|
TAILQ_FOREACH(temp_ih, &ithread->it_handlers, ih_next)
|
|
if (temp_ih->ih_pri > ih->ih_pri)
|
|
break;
|
|
if (temp_ih == NULL)
|
|
TAILQ_INSERT_TAIL(&ithread->it_handlers, ih, ih_next);
|
|
else
|
|
TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
|
|
ithread_update(ithread);
|
|
mtx_unlock_spin(&ithread_list_lock);
|
|
|
|
if (cookiep != NULL)
|
|
*cookiep = ih;
|
|
return (0);
|
|
|
|
fail:
|
|
mtx_unlock_spin(&ithread_list_lock);
|
|
free(ih, M_ITHREAD);
|
|
return (EINVAL);
|
|
}
|
|
|
|
int
|
|
ithread_remove_handler(void *cookie)
|
|
{
|
|
struct intrhand *handler = (struct intrhand *)cookie;
|
|
struct ithd *ithread;
|
|
#ifdef INVARIANTS
|
|
struct intrhand *ih;
|
|
int found;
|
|
#endif
|
|
|
|
if (handler == NULL || (ithread = handler->ih_ithread) == NULL)
|
|
return (EINVAL);
|
|
|
|
mtx_lock_spin(&ithread_list_lock);
|
|
#ifdef INVARIANTS
|
|
found = 0;
|
|
TAILQ_FOREACH(ih, &ithread->it_handlers, ih_next)
|
|
if (ih == handler) {
|
|
found++;
|
|
break;
|
|
}
|
|
if (found == 0) {
|
|
mtx_unlock_spin(&ithread_list_lock);
|
|
return (EINVAL);
|
|
}
|
|
#endif
|
|
TAILQ_REMOVE(&ithread->it_handlers, handler, ih_next);
|
|
ithread_update(ithread);
|
|
mtx_unlock_spin(&ithread_list_lock);
|
|
|
|
free(handler, M_ITHREAD);
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
swi_add(struct ithd **ithdp, const char *name, driver_intr_t handler,
|
|
void *arg, int pri, enum intr_type flags, void **cookiep)
|
|
{
|
|
struct proc *p;
|
|
struct ithd *ithd;
|
|
int error;
|
|
|
|
ithd = (ithdp != NULL) ? *ithdp : NULL;
|
|
|
|
if (ithd == NULL) {
|
|
error = ithread_create(&ithd, pri, IT_SOFT, NULL, NULL,
|
|
"swi%d:", pri);
|
|
if (error)
|
|
return (error);
|
|
|
|
/* XXX - some hacks are _really_ gross */
|
|
p = ithd->it_proc;
|
|
PROC_LOCK(p);
|
|
if (pri == SWI_CLOCK)
|
|
p->p_flag |= P_NOLOAD;
|
|
PROC_UNLOCK(p);
|
|
if (ithdp != NULL)
|
|
*ithdp = ithd;
|
|
}
|
|
return (ithread_add_handler(ithd, name, handler, arg,
|
|
(pri * RQ_PPQ) + PI_SOFT, flags, cookiep));
|
|
}
|
|
|
|
|
|
/*
|
|
* Schedule a heavyweight software interrupt process.
|
|
*/
|
|
void
|
|
swi_sched(void *cookie, int flags)
|
|
{
|
|
struct intrhand *ih = (struct intrhand *)cookie;
|
|
struct ithd *it = ih->ih_ithread;
|
|
struct proc *p = it->it_proc;
|
|
|
|
atomic_add_int(&cnt.v_intr, 1); /* one more global interrupt */
|
|
|
|
CTR3(KTR_INTR, "swi_sched pid %d(%s) need=%d",
|
|
p->p_pid, p->p_comm, it->it_need);
|
|
|
|
/*
|
|
* Set it_need so that if the thread is already running but close
|
|
* to done, it will do another go-round. Then get the sched lock
|
|
* and see if the thread is on whichkqs yet. If not, put it on
|
|
* there. In any case, kick everyone so that if the new thread
|
|
* is higher priority than their current thread, it gets run now.
|
|
*/
|
|
atomic_store_rel_int(&ih->ih_need, 1);
|
|
if (!(flags & SWI_DELAY)) {
|
|
it->it_need = 1;
|
|
mtx_lock_spin(&sched_lock);
|
|
if (p->p_stat == SWAIT) { /* not on run queue */
|
|
CTR1(KTR_INTR, "swi_sched: setrunqueue %d", p->p_pid);
|
|
p->p_stat = SRUN;
|
|
setrunqueue(p);
|
|
if (!cold && flags & SWI_SWITCH) {
|
|
if (curproc != PCPU_GET(idleproc))
|
|
setrunqueue(curproc);
|
|
curproc->p_stats->p_ru.ru_nvcsw++;
|
|
mi_switch();
|
|
} else
|
|
need_resched();
|
|
}
|
|
else {
|
|
CTR3(KTR_INTR, "swi_sched %d: it_need %d, state %d",
|
|
p->p_pid, it->it_need, p->p_stat );
|
|
}
|
|
mtx_unlock_spin(&sched_lock);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* This is the main code for interrupt threads.
|
|
*/
|
|
void
|
|
ithread_loop(void *arg)
|
|
{
|
|
struct ithd *ithd; /* our thread context */
|
|
struct intrhand *ih; /* and our interrupt handler chain */
|
|
struct proc *p;
|
|
|
|
p = curproc;
|
|
ithd = (struct ithd *)arg; /* point to myself */
|
|
KASSERT(ithd->it_proc == p && p->p_ithd == ithd,
|
|
(__func__ ": ithread and proc linkage out of sync"));
|
|
|
|
/*
|
|
* As long as we have interrupts outstanding, go through the
|
|
* list of handlers, giving each one a go at it.
|
|
*/
|
|
for (;;) {
|
|
/*
|
|
* If we are an orphaned thread, then just die.
|
|
*/
|
|
if (ithd->it_flags & IT_DEAD) {
|
|
CTR2(KTR_INTR, __func__ ": pid %d: (%s) exiting",
|
|
p->p_pid, p->p_comm);
|
|
p->p_ithd = NULL;
|
|
mtx_lock(&Giant);
|
|
free(ithd, M_ITHREAD);
|
|
kthread_exit(0);
|
|
}
|
|
|
|
CTR3(KTR_INTR, __func__ ": pid %d: (%s) need=%d",
|
|
p->p_pid, p->p_comm, ithd->it_need);
|
|
while (ithd->it_need) {
|
|
/*
|
|
* Service interrupts. If another interrupt
|
|
* arrives while we are running, they will set
|
|
* it_need to denote that we should make
|
|
* another pass.
|
|
*/
|
|
atomic_store_rel_int(&ithd->it_need, 0);
|
|
TAILQ_FOREACH(ih, &ithd->it_handlers, ih_next) {
|
|
if (ithd->it_flags & IT_SOFT && !ih->ih_need)
|
|
continue;
|
|
atomic_store_rel_int(&ih->ih_need, 0);
|
|
CTR5(KTR_INTR,
|
|
__func__ ": pid %d ih=%p: %p(%p) flg=%x",
|
|
p->p_pid, (void *)ih,
|
|
(void *)ih->ih_handler, ih->ih_argument,
|
|
ih->ih_flags);
|
|
|
|
if ((ih->ih_flags & IH_MPSAFE) == 0)
|
|
mtx_lock(&Giant);
|
|
ih->ih_handler(ih->ih_argument);
|
|
if ((ih->ih_flags & IH_MPSAFE) == 0)
|
|
mtx_unlock(&Giant);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Processed all our interrupts. Now get the sched
|
|
* lock. This may take a while and it_need may get
|
|
* set again, so we have to check it again.
|
|
*/
|
|
mtx_assert(&Giant, MA_NOTOWNED);
|
|
mtx_lock_spin(&sched_lock);
|
|
if (!ithd->it_need) {
|
|
/*
|
|
* Should we call this earlier in the loop above?
|
|
*/
|
|
if (ithd->it_enable != NULL)
|
|
ithd->it_enable(ithd->it_vector);
|
|
p->p_stat = SWAIT; /* we're idle */
|
|
CTR1(KTR_INTR, __func__ ": pid %d: done", p->p_pid);
|
|
mi_switch();
|
|
CTR1(KTR_INTR, __func__ ": pid %d: resumed", p->p_pid);
|
|
}
|
|
mtx_unlock_spin(&sched_lock);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize mutex used to protect ithread handler lists.
|
|
*/
|
|
static void
|
|
ithread_init(void *dummy)
|
|
{
|
|
|
|
mtx_init(&ithread_list_lock, "ithread list lock", MTX_SPIN);
|
|
}
|
|
SYSINIT(ithread_init, SI_SUB_INTR, SI_ORDER_FIRST, ithread_init, NULL);
|
|
|
|
/*
|
|
* Start standard software interrupt threads
|
|
*/
|
|
static void
|
|
start_softintr(void *dummy)
|
|
{
|
|
|
|
if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, 0, &net_ih) ||
|
|
swi_add(&clk_ithd, "clock", softclock, NULL, SWI_CLOCK,
|
|
INTR_MPSAFE, &softclock_ih) ||
|
|
swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, 0, &vm_ih))
|
|
panic("died while creating standard software ithreads");
|
|
}
|
|
SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, NULL)
|
|
|
|
void
|
|
legacy_setsoftnet(void)
|
|
{
|
|
swi_sched(net_ih, SWI_NOSWITCH);
|
|
}
|
|
|
|
/*
|
|
* XXX: This should really be in the network code somewhere and installed
|
|
* via a SI_SUB_SOFINTR, SI_ORDER_MIDDLE sysinit.
|
|
*/
|
|
void (*netisrs[32]) __P((void));
|
|
u_int netisr;
|
|
|
|
int
|
|
register_netisr(num, handler)
|
|
int num;
|
|
netisr_t *handler;
|
|
{
|
|
|
|
if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
|
|
printf("register_netisr: bad isr number: %d\n", num);
|
|
return (EINVAL);
|
|
}
|
|
netisrs[num] = handler;
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
unregister_netisr(num)
|
|
int num;
|
|
{
|
|
|
|
if (num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs)) ) {
|
|
printf("unregister_netisr: bad isr number: %d\n", num);
|
|
return (EINVAL);
|
|
}
|
|
netisrs[num] = NULL;
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
swi_net(void *dummy)
|
|
{
|
|
u_int bits;
|
|
int i;
|
|
|
|
bits = atomic_readandclear_int(&netisr);
|
|
while ((i = ffs(bits)) != 0) {
|
|
i--;
|
|
if (netisrs[i] != NULL)
|
|
netisrs[i]();
|
|
else
|
|
printf("swi_net: unregistered isr number: %d.\n", i);
|
|
bits &= ~(1 << i);
|
|
}
|
|
}
|