- Use a new flag, KEF_XFERABLE, to record with certainty that this kse had

contributed to the transferable load count.  This prevents any potential
   problems with sched_pin() being used around calls to setrunqueue().
 - Change the sched_add() load balancing algorithm to try to migrate on
   wakeup.  This attempts to place threads that communicate with each other
   on the same CPU.
 - Don't clear the idle counts in kseq_transfer(), let the cpus do that when
   they call sched_add() from kseq_assign().
 - Correct a few out of date comments.
 - Make sure the ke_cpu field is correct when we preempt.
 - Call kseq_assign() from sched_clock() to catch any assignments that were
   done without IPI.  Presently all assignments are done with an IPI, but I'm
   trying a patch that limits that.
 - Don't migrate a thread if it is still runnable in sched_add().  Previously,
   this could only happen for KSE threads, but due to changes to
   sched_switch() all threads went through this path.
 - Remove some code that was added with preemption but is not necessary.
This commit is contained in:
Jeff Roberson 2004-08-10 07:52:21 +00:00
parent a6c0289214
commit 2454aaf51c

View File

@ -100,6 +100,7 @@ struct ke_sched {
#define KEF_ASSIGNED KEF_SCHED0 /* KSE is being migrated. */
#define KEF_BOUND KEF_SCHED1 /* KSE can not migrate. */
#define KEF_XFERABLE KEF_SCHED2 /* KSE was added as transferable. */
struct kg_sched {
int skg_slptime; /* Number of ticks we vol. slept */
@ -332,6 +333,7 @@ kseq_runq_add(struct kseq *kseq, struct kse *ke)
if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) {
kseq->ksq_transferable++;
kseq->ksq_group->ksg_transferable++;
ke->ke_flags |= KEF_XFERABLE;
}
#endif
runq_add(ke->ke_runq, ke);
@ -341,9 +343,10 @@ static __inline void
kseq_runq_rem(struct kseq *kseq, struct kse *ke)
{
#ifdef SMP
if (KSE_CAN_MIGRATE(ke, PRI_BASE(ke->ke_ksegrp->kg_pri_class))) {
if (ke->ke_flags & KEF_XFERABLE) {
kseq->ksq_transferable--;
kseq->ksq_group->ksg_transferable--;
ke->ke_flags &= ~KEF_XFERABLE;
}
#endif
runq_remove(ke->ke_runq, ke);
@ -651,9 +654,11 @@ kseq_notify(struct kse *ke, int cpu)
struct kseq *kseq;
struct thread *td;
struct pcpu *pcpu;
int prio;
ke->ke_cpu = cpu;
ke->ke_flags |= KEF_ASSIGNED;
prio = ke->ke_thread->td_priority;
kseq = KSEQ_CPU(cpu);
@ -663,6 +668,12 @@ kseq_notify(struct kse *ke, int cpu)
do {
*(volatile struct kse **)&ke->ke_assign = kseq->ksq_assigned;
} while(!atomic_cmpset_ptr(&kseq->ksq_assigned, ke->ke_assign, ke));
/*
* Without sched_lock we could lose a race where we set NEEDRESCHED
* on a thread that is switched out before the IPI is delivered. This
* would lead us to miss the resched. This will be a problem once
* sched_lock is pushed down.
*/
pcpu = pcpu_find(cpu);
td = pcpu->pc_curthread;
if (ke->ke_thread->td_priority < td->td_priority ||
@ -727,43 +738,56 @@ kseq_transfer(struct kseq *kseq, struct kse *ke, int class)
if (smp_started == 0)
return (0);
cpu = 0;
ksg = kseq->ksq_group;
/*
* If there are any idle groups, give them our extra load. The
* threshold at which we start to reassign kses has a large impact
* If our load exceeds a certain threshold we should attempt to
* reassign this thread. The first candidate is the cpu that
* originally ran the thread. If it is idle, assign it there,
* otherwise, pick an idle cpu.
*
* The threshold at which we start to reassign kses has a large impact
* on the overall performance of the system. Tuned too high and
* some CPUs may idle. Too low and there will be excess migration
* and context switches.
*/
if (ksg->ksg_load > (ksg->ksg_cpus * 2) && kseq_idle) {
ksg = kseq->ksq_group;
if (ksg->ksg_load > ksg->ksg_cpus && kseq_idle) {
ksg = KSEQ_CPU(ke->ke_cpu)->ksq_group;
if (kseq_idle & ksg->ksg_mask) {
cpu = ffs(ksg->ksg_idlemask);
if (cpu)
goto migrate;
}
/*
* Multiple cpus could find this bit simultaneously
* but the race shouldn't be terrible.
*/
cpu = ffs(kseq_idle);
if (cpu)
atomic_clear_int(&kseq_idle, 1 << (cpu - 1));
goto migrate;
}
/*
* If another cpu in this group has idled, assign a thread over
* to them after checking to see if there are idled groups.
*/
if (cpu == 0 && kseq->ksq_load > 1 && ksg->ksg_idlemask) {
ksg = kseq->ksq_group;
if (ksg->ksg_idlemask) {
cpu = ffs(ksg->ksg_idlemask);
if (cpu)
ksg->ksg_idlemask &= ~(1 << (cpu - 1));
goto migrate;
}
/*
* No new CPU was found.
*/
return (0);
migrate:
/*
* Now that we've found an idle CPU, migrate the thread.
*/
if (cpu) {
cpu--;
ke->ke_runq = NULL;
kseq_notify(ke, cpu);
return (1);
}
return (0);
cpu--;
ke->ke_runq = NULL;
kseq_notify(ke, cpu);
return (1);
}
#endif /* SMP */
@ -958,7 +982,7 @@ sched_slice(struct kse *ke)
/*
* Rationale:
* KSEs in interactive ksegs get the minimum slice so that we
* KSEs in interactive ksegs get a minimal slice so that we
* quickly notice if it abuses its advantage.
*
* KSEs in non-interactive ksegs are assigned a slice that is
@ -1020,7 +1044,7 @@ sched_interact_update(struct ksegrp *kg)
/*
* If we have exceeded by more than 1/5th then the algorithm below
* will not bring us back into range. Dividing by two here forces
* us into the range of [3/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX]
* us into the range of [4/5 * SCHED_INTERACT_MAX, SCHED_INTERACT_MAX]
*/
if (sum > (SCHED_SLP_RUN_MAX / 5) * 6) {
kg->kg_runtime /= 2;
@ -1144,9 +1168,9 @@ sched_switch(struct thread *td, struct thread *newtd)
* to the new cpu. This is the case in sched_bind().
*/
if ((ke->ke_flags & KEF_ASSIGNED) == 0) {
if (td == PCPU_GET(idlethread))
if (td == PCPU_GET(idlethread)) {
TD_SET_CAN_RUN(td);
else if (TD_IS_RUNNING(td)) {
} else if (TD_IS_RUNNING(td)) {
kseq_load_rem(KSEQ_CPU(ke->ke_cpu), ke);
setrunqueue(td);
} else {
@ -1162,10 +1186,12 @@ sched_switch(struct thread *td, struct thread *newtd)
kse_reassign(ke);
}
}
if (newtd == NULL)
newtd = choosethread();
else
if (newtd != NULL) {
kseq_load_add(KSEQ_SELF(), newtd->td_kse);
ke->ke_cpu = PCPU_GET(cpuid);
ke->ke_runq = KSEQ_SELF()->ksq_curr;
} else
newtd = choosethread();
if (td != newtd)
cpu_switch(td, newtd);
sched_lock.mtx_lock = (uintptr_t)td;
@ -1392,11 +1418,18 @@ sched_clock(struct thread *td)
struct kse *ke;
mtx_assert(&sched_lock, MA_OWNED);
kseq = KSEQ_SELF();
#ifdef SMP
if (ticks == bal_tick)
sched_balance();
if (ticks == gbal_tick)
sched_balance_groups();
/*
* We could have been assigned a non real-time thread without an
* IPI.
*/
if (kseq->ksq_assigned)
kseq_assign(kseq); /* Potentially sets NEEDRESCHED */
#endif
/*
* sched_setup() apparently happens prior to stathz being set. We
@ -1450,7 +1483,6 @@ sched_clock(struct thread *td)
/*
* We're out of time, recompute priorities and requeue.
*/
kseq = KSEQ_SELF();
kseq_load_rem(kseq, ke);
sched_priority(kg);
sched_slice(ke);
@ -1553,6 +1585,9 @@ sched_add_internal(struct thread *td, int preemptive)
struct kseq *kseq;
struct ksegrp *kg;
struct kse *ke;
#ifdef SMP
int canmigrate;
#endif
int class;
mtx_assert(&sched_lock, MA_OWNED);
@ -1602,7 +1637,18 @@ sched_add_internal(struct thread *td, int preemptive)
break;
}
#ifdef SMP
if (ke->ke_cpu != PCPU_GET(cpuid)) {
/*
* Don't migrate running threads here. Force the long term balancer
* to do it.
*/
canmigrate = KSE_CAN_MIGRATE(ke, class);
if (TD_IS_RUNNING(td))
canmigrate = 0;
/*
* If this thread is pinned or bound, notify the target cpu.
*/
if (!canmigrate && ke->ke_cpu != PCPU_GET(cpuid) ) {
ke->ke_runq = NULL;
kseq_notify(ke, ke->ke_cpu);
return;
@ -1624,20 +1670,16 @@ sched_add_internal(struct thread *td, int preemptive)
* Now remove ourselves from the group specific idle mask.
*/
kseq->ksq_group->ksg_idlemask &= ~PCPU_GET(cpumask);
} else if (kseq->ksq_load > 1 && KSE_CAN_MIGRATE(ke, class))
} else if (kseq->ksq_load > 1 && canmigrate)
if (kseq_transfer(kseq, ke, class))
return;
ke->ke_cpu = PCPU_GET(cpuid);
#endif
/*
* XXX With preemption this is not necessary.
*/
if (td->td_priority < curthread->td_priority)
curthread->td_flags |= TDF_NEEDRESCHED;
#ifdef SMP
/*
* Only try to preempt if the thread is unpinned or pinned to the
* current CPU.
*/
if (KSE_CAN_MIGRATE(ke, class) || ke->ke_cpu == PCPU_GET(cpuid))
#endif
if (preemptive && maybe_preempt(td))
return;
ke->ke_ksegrp->kg_runq_kses++;