- Now that we have kseq groups, balance them seperately.

- The new sched_balance_groups() function does intra-group balancing while
   sched_balance() balances the available groups.
 - Pick a random time between 0 ticks and hz * 2 ticks to restart each
   balancing process.  Each balancer has its own timeout.
 - Pick a random place in the list of groups to start the search for lowest
   and highest group loads.  This prevents us from prefering a group based on
   numeric position.
 - Use a nasty hack to stop us from preferring cpu 0.  The problem is that
   softclock always runs on cpu 0, so it always has a little extra load.  We
   ignore this load in the balancer for now.  In the future softclock should
   run on a random cpu and these hacks can go away.
This commit is contained in:
Jeff Roberson 2003-12-12 07:33:51 +00:00
parent d7e92f7b85
commit cac77d0422

View File

@ -78,8 +78,9 @@ int realstathz;
int tickincr = 1; int tickincr = 1;
#ifdef SMP #ifdef SMP
/* Callout to handle load balancing SMP systems. */ /* Callouts to handle load balancing SMP systems. */
static struct callout kseq_lb_callout; static struct callout kseq_lb_callout;
static struct callout kseq_group_callout;
#endif #endif
/* /*
@ -234,6 +235,7 @@ struct kseq_group {
int ksg_cpumask; /* Mask of cpus in this group. */ int ksg_cpumask; /* Mask of cpus in this group. */
int ksg_idlemask; /* Idle cpus in this group. */ int ksg_idlemask; /* Idle cpus in this group. */
int ksg_mask; /* Bit mask for first cpu. */ int ksg_mask; /* Bit mask for first cpu. */
int ksg_load; /* Total load of this group. */
int ksg_transferable; /* Transferable load of this group. */ int ksg_transferable; /* Transferable load of this group. */
LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */ LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */
}; };
@ -244,10 +246,13 @@ struct kseq_group {
*/ */
#ifdef SMP #ifdef SMP
static int kseq_idle; static int kseq_idle;
static int ksg_maxid;
static struct kseq kseq_cpu[MAXCPU]; static struct kseq kseq_cpu[MAXCPU];
static struct kseq_group kseq_groups[MAXCPU]; static struct kseq_group kseq_groups[MAXCPU];
#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)]) #define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)])
#define KSEQ_CPU(x) (&kseq_cpu[(x)]) #define KSEQ_CPU(x) (&kseq_cpu[(x)])
#define KSEQ_ID(x) ((x) - kseq_cpu)
#define KSEQ_GROUP(x) (&kseq_groups[(x)])
#else /* !SMP */ #else /* !SMP */
static struct kseq kseq_cpu; static struct kseq kseq_cpu;
#define KSEQ_SELF() (&kseq_cpu) #define KSEQ_SELF() (&kseq_cpu)
@ -275,6 +280,8 @@ void kseq_print(int cpu);
static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class); static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class);
static struct kse *runq_steal(struct runq *rq); static struct kse *runq_steal(struct runq *rq);
static void sched_balance(void *arg); static void sched_balance(void *arg);
static void sched_balance_group(struct kseq_group *ksg);
static void sched_balance_pair(struct kseq *high, struct kseq *low);
static void kseq_move(struct kseq *from, int cpu); static void kseq_move(struct kseq *from, int cpu);
static int kseq_idled(struct kseq *kseq); static int kseq_idled(struct kseq *kseq);
static void kseq_notify(struct kse *ke, int cpu); static void kseq_notify(struct kse *ke, int cpu);
@ -340,6 +347,10 @@ kseq_load_add(struct kseq *kseq, struct kse *ke)
if (class == PRI_TIMESHARE) if (class == PRI_TIMESHARE)
kseq->ksq_load_timeshare++; kseq->ksq_load_timeshare++;
kseq->ksq_load++; kseq->ksq_load++;
#ifdef SMP
if (class != PRI_ITHD)
kseq->ksq_group->ksg_load++;
#endif
if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
CTR6(KTR_ULE, CTR6(KTR_ULE,
"Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))", "Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
@ -357,6 +368,10 @@ kseq_load_rem(struct kseq *kseq, struct kse *ke)
class = PRI_BASE(ke->ke_ksegrp->kg_pri_class); class = PRI_BASE(ke->ke_ksegrp->kg_pri_class);
if (class == PRI_TIMESHARE) if (class == PRI_TIMESHARE)
kseq->ksq_load_timeshare--; kseq->ksq_load_timeshare--;
#ifdef SMP
if (class != PRI_ITHD)
kseq->ksq_group->ksg_load--;
#endif
kseq->ksq_load--; kseq->ksq_load--;
ke->ke_runq = NULL; ke->ke_runq = NULL;
if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE) if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
@ -421,75 +436,128 @@ kseq_nice_rem(struct kseq *kseq, int nice)
static void static void
sched_balance(void *arg) sched_balance(void *arg)
{ {
struct kseq *kseq; struct kseq_group *high;
int transferable; struct kseq_group *low;
int high_load; struct kseq_group *ksg;
int low_load; int timo;
int high_cpu; int cnt;
int low_cpu;
int move;
int diff;
int i; int i;
high_cpu = 0;
low_cpu = 0;
high_load = 0;
low_load = -1;
mtx_lock_spin(&sched_lock); mtx_lock_spin(&sched_lock);
if (smp_started == 0) if (smp_started == 0)
goto out; goto out;
low = high = NULL;
for (i = 0; i <= mp_maxid; i++) { i = random() % (ksg_maxid + 1);
if (CPU_ABSENT(i) || (i & stopped_cpus) != 0) for (cnt = 0; cnt <= ksg_maxid; cnt++) {
continue; ksg = KSEQ_GROUP(i);
kseq = KSEQ_CPU(i);
/* /*
* Find the CPU with the highest load that has some threads * Find the CPU with the highest load that has some
* to transfer. * threads to transfer.
*/ */
if (kseq->ksq_load > high_load && if ((high == NULL || ksg->ksg_load > high->ksg_load)
kseq->ksq_group->ksg_transferable) { && ksg->ksg_transferable)
high_load = kseq->ksq_load; high = ksg;
high_cpu = i; if (low == NULL || ksg->ksg_load < low->ksg_load)
} low = ksg;
if (low_load == -1 || kseq->ksq_load < low_load) { if (++i > ksg_maxid)
low_load = kseq->ksq_load; i = 0;
low_cpu = i;
}
} }
kseq = KSEQ_CPU(high_cpu); if (low != NULL && high != NULL && high != low)
/* sched_balance_pair(LIST_FIRST(&high->ksg_members),
* Nothing to do. LIST_FIRST(&low->ksg_members));
*/ out:
if (low_load >= high_load) mtx_unlock_spin(&sched_lock);
goto out; timo = random() % (hz * 2);
callout_reset(&kseq_lb_callout, timo, sched_balance, NULL);
}
static void
sched_balance_groups(void *arg)
{
int timo;
int i;
mtx_lock_spin(&sched_lock);
if (smp_started)
for (i = 0; i <= ksg_maxid; i++)
sched_balance_group(KSEQ_GROUP(i));
mtx_unlock_spin(&sched_lock);
timo = random() % (hz * 2);
callout_reset(&kseq_group_callout, timo, sched_balance_groups, NULL);
}
static void
sched_balance_group(struct kseq_group *ksg)
{
struct kseq *kseq;
struct kseq *high;
struct kseq *low;
int load;
if (ksg->ksg_transferable == 0)
return;
low = NULL;
high = NULL;
LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) {
load = kseq->ksq_load;
if (kseq == KSEQ_CPU(0))
load--;
if (high == NULL || load > high->ksq_load)
high = kseq;
if (low == NULL || load < low->ksq_load)
low = kseq;
}
if (high != NULL && low != NULL && high != low)
sched_balance_pair(high, low);
}
static void
sched_balance_pair(struct kseq *high, struct kseq *low)
{
int transferable;
int high_load;
int low_load;
int move;
int diff;
int i;
/* /*
* If we're transfering within a group we have to use this specific * If we're transfering within a group we have to use this specific
* kseq's transferable count, otherwise we can steal from other members * kseq's transferable count, otherwise we can steal from other members
* of the group. * of the group.
*/ */
if (kseq->ksq_group == KSEQ_CPU(low_cpu)->ksq_group) if (high->ksq_group == low->ksq_group) {
transferable = kseq->ksq_transferable; transferable = high->ksq_transferable;
else high_load = high->ksq_load;
transferable = kseq->ksq_group->ksg_transferable; low_load = low->ksq_load;
/*
* XXX If we encounter cpu 0 we must remember to reduce it's
* load by 1 to reflect the swi that is running the callout.
* At some point we should really fix load balancing of the
* swi and then this wont matter.
*/
if (high == KSEQ_CPU(0))
high_load--;
if (low == KSEQ_CPU(0))
low_load--;
} else {
transferable = high->ksq_group->ksg_transferable;
high_load = high->ksq_group->ksg_load;
low_load = low->ksq_group->ksg_load;
}
if (transferable == 0) if (transferable == 0)
goto out; return;
/* /*
* Determine what the imbalance is and then adjust that to how many * Determine what the imbalance is and then adjust that to how many
* kses we actually have to give up (transferable). * kses we actually have to give up (transferable).
*/ */
diff = kseq->ksq_load - low_load; diff = high_load - low_load;
move = diff / 2; move = diff / 2;
if (diff & 0x1) if (diff & 0x1)
move++; move++;
move = min(move, transferable); move = min(move, transferable);
for (i = 0; i < move; i++) for (i = 0; i < move; i++)
kseq_move(kseq, low_cpu); kseq_move(high, KSEQ_ID(low));
out:
mtx_unlock_spin(&sched_lock);
callout_reset(&kseq_lb_callout, hz, sched_balance, NULL);
return; return;
} }
@ -763,6 +831,7 @@ static void
sched_setup(void *dummy) sched_setup(void *dummy)
{ {
#ifdef SMP #ifdef SMP
int balance_groups;
int i; int i;
#endif #endif
@ -770,6 +839,7 @@ sched_setup(void *dummy)
slice_max = (hz/7); /* ~140ms */ slice_max = (hz/7); /* ~140ms */
#ifdef SMP #ifdef SMP
balance_groups = 0;
/* /*
* Initialize the kseqs. * Initialize the kseqs.
*/ */
@ -795,6 +865,7 @@ sched_setup(void *dummy)
ksg->ksg_cpus = 1; ksg->ksg_cpus = 1;
ksg->ksg_idlemask = 0; ksg->ksg_idlemask = 0;
ksg->ksg_cpumask = ksg->ksg_mask = 1 << i; ksg->ksg_cpumask = ksg->ksg_mask = 1 << i;
ksg->ksg_load = 0;
ksg->ksg_transferable = 0; ksg->ksg_transferable = 0;
LIST_INIT(&ksg->ksg_members); LIST_INIT(&ksg->ksg_members);
LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings); LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings);
@ -811,6 +882,7 @@ sched_setup(void *dummy)
* Initialize the group. * Initialize the group.
*/ */
ksg->ksg_idlemask = 0; ksg->ksg_idlemask = 0;
ksg->ksg_load = 0;
ksg->ksg_transferable = 0; ksg->ksg_transferable = 0;
ksg->ksg_cpus = cg->cg_count; ksg->ksg_cpus = cg->cg_count;
ksg->ksg_cpumask = cg->cg_mask; ksg->ksg_cpumask = cg->cg_mask;
@ -828,10 +900,21 @@ sched_setup(void *dummy)
&kseq_cpu[j], ksq_siblings); &kseq_cpu[j], ksq_siblings);
} }
} }
if (ksg->ksg_cpus > 1)
balance_groups = 1;
} }
ksg_maxid = smp_topology->ct_count - 1;
} }
callout_init(&kseq_lb_callout, CALLOUT_MPSAFE); callout_init(&kseq_lb_callout, CALLOUT_MPSAFE);
callout_init(&kseq_group_callout, CALLOUT_MPSAFE);
sched_balance(NULL); sched_balance(NULL);
/*
* Stagger the group and global load balancer so they do not
* interfere with each other.
*/
if (balance_groups)
callout_reset(&kseq_group_callout, hz / 2,
sched_balance_groups, NULL);
#else #else
kseq_setup(KSEQ_SELF()); kseq_setup(KSEQ_SELF());
#endif #endif