- Now that we have kseq groups, balance them seperately.

- The new sched_balance_groups() function does intra-group balancing while
   sched_balance() balances the available groups.
 - Pick a random time between 0 ticks and hz * 2 ticks to restart each
   balancing process.  Each balancer has its own timeout.
 - Pick a random place in the list of groups to start the search for lowest
   and highest group loads.  This prevents us from prefering a group based on
   numeric position.
 - Use a nasty hack to stop us from preferring cpu 0.  The problem is that
   softclock always runs on cpu 0, so it always has a little extra load.  We
   ignore this load in the balancer for now.  In the future softclock should
   run on a random cpu and these hacks can go away.
This commit is contained in:
jeff 2003-12-12 07:33:51 +00:00
parent f1c515afdb
commit 3eb0771fde

View File

@ -78,8 +78,9 @@ int realstathz;
int tickincr = 1;
#ifdef SMP
/* Callout to handle load balancing SMP systems. */
/* Callouts to handle load balancing SMP systems. */
static struct callout kseq_lb_callout;
static struct callout kseq_group_callout;
#endif
/*
@ -234,6 +235,7 @@ struct kseq_group {
int ksg_cpumask; /* Mask of cpus in this group. */
int ksg_idlemask; /* Idle cpus in this group. */
int ksg_mask; /* Bit mask for first cpu. */
int ksg_load; /* Total load of this group. */
int ksg_transferable; /* Transferable load of this group. */
LIST_HEAD(, kseq) ksg_members; /* Linked list of all members. */
};
@ -244,10 +246,13 @@ struct kseq_group {
*/
#ifdef SMP
static int kseq_idle;
static int ksg_maxid;
static struct kseq kseq_cpu[MAXCPU];
static struct kseq_group kseq_groups[MAXCPU];
#define KSEQ_SELF() (&kseq_cpu[PCPU_GET(cpuid)])
#define KSEQ_CPU(x) (&kseq_cpu[(x)])
#define KSEQ_ID(x) ((x) - kseq_cpu)
#define KSEQ_GROUP(x) (&kseq_groups[(x)])
#else /* !SMP */
static struct kseq kseq_cpu;
#define KSEQ_SELF() (&kseq_cpu)
@ -275,6 +280,8 @@ void kseq_print(int cpu);
static int kseq_transfer(struct kseq *ksq, struct kse *ke, int class);
static struct kse *runq_steal(struct runq *rq);
static void sched_balance(void *arg);
static void sched_balance_group(struct kseq_group *ksg);
static void sched_balance_pair(struct kseq *high, struct kseq *low);
static void kseq_move(struct kseq *from, int cpu);
static int kseq_idled(struct kseq *kseq);
static void kseq_notify(struct kse *ke, int cpu);
@ -340,6 +347,10 @@ kseq_load_add(struct kseq *kseq, struct kse *ke)
if (class == PRI_TIMESHARE)
kseq->ksq_load_timeshare++;
kseq->ksq_load++;
#ifdef SMP
if (class != PRI_ITHD)
kseq->ksq_group->ksg_load++;
#endif
if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
CTR6(KTR_ULE,
"Add kse %p to %p (slice: %d, pri: %d, nice: %d(%d))",
@ -357,6 +368,10 @@ kseq_load_rem(struct kseq *kseq, struct kse *ke)
class = PRI_BASE(ke->ke_ksegrp->kg_pri_class);
if (class == PRI_TIMESHARE)
kseq->ksq_load_timeshare--;
#ifdef SMP
if (class != PRI_ITHD)
kseq->ksq_group->ksg_load--;
#endif
kseq->ksq_load--;
ke->ke_runq = NULL;
if (ke->ke_ksegrp->kg_pri_class == PRI_TIMESHARE)
@ -421,75 +436,128 @@ kseq_nice_rem(struct kseq *kseq, int nice)
static void
sched_balance(void *arg)
{
struct kseq *kseq;
int transferable;
int high_load;
int low_load;
int high_cpu;
int low_cpu;
int move;
int diff;
struct kseq_group *high;
struct kseq_group *low;
struct kseq_group *ksg;
int timo;
int cnt;
int i;
high_cpu = 0;
low_cpu = 0;
high_load = 0;
low_load = -1;
mtx_lock_spin(&sched_lock);
if (smp_started == 0)
goto out;
for (i = 0; i <= mp_maxid; i++) {
if (CPU_ABSENT(i) || (i & stopped_cpus) != 0)
continue;
kseq = KSEQ_CPU(i);
low = high = NULL;
i = random() % (ksg_maxid + 1);
for (cnt = 0; cnt <= ksg_maxid; cnt++) {
ksg = KSEQ_GROUP(i);
/*
* Find the CPU with the highest load that has some threads
* to transfer.
* Find the CPU with the highest load that has some
* threads to transfer.
*/
if (kseq->ksq_load > high_load &&
kseq->ksq_group->ksg_transferable) {
high_load = kseq->ksq_load;
high_cpu = i;
}
if (low_load == -1 || kseq->ksq_load < low_load) {
low_load = kseq->ksq_load;
low_cpu = i;
}
if ((high == NULL || ksg->ksg_load > high->ksg_load)
&& ksg->ksg_transferable)
high = ksg;
if (low == NULL || ksg->ksg_load < low->ksg_load)
low = ksg;
if (++i > ksg_maxid)
i = 0;
}
kseq = KSEQ_CPU(high_cpu);
/*
* Nothing to do.
*/
if (low_load >= high_load)
goto out;
if (low != NULL && high != NULL && high != low)
sched_balance_pair(LIST_FIRST(&high->ksg_members),
LIST_FIRST(&low->ksg_members));
out:
mtx_unlock_spin(&sched_lock);
timo = random() % (hz * 2);
callout_reset(&kseq_lb_callout, timo, sched_balance, NULL);
}
static void
sched_balance_groups(void *arg)
{
int timo;
int i;
mtx_lock_spin(&sched_lock);
if (smp_started)
for (i = 0; i <= ksg_maxid; i++)
sched_balance_group(KSEQ_GROUP(i));
mtx_unlock_spin(&sched_lock);
timo = random() % (hz * 2);
callout_reset(&kseq_group_callout, timo, sched_balance_groups, NULL);
}
static void
sched_balance_group(struct kseq_group *ksg)
{
struct kseq *kseq;
struct kseq *high;
struct kseq *low;
int load;
if (ksg->ksg_transferable == 0)
return;
low = NULL;
high = NULL;
LIST_FOREACH(kseq, &ksg->ksg_members, ksq_siblings) {
load = kseq->ksq_load;
if (kseq == KSEQ_CPU(0))
load--;
if (high == NULL || load > high->ksq_load)
high = kseq;
if (low == NULL || load < low->ksq_load)
low = kseq;
}
if (high != NULL && low != NULL && high != low)
sched_balance_pair(high, low);
}
static void
sched_balance_pair(struct kseq *high, struct kseq *low)
{
int transferable;
int high_load;
int low_load;
int move;
int diff;
int i;
/*
* If we're transfering within a group we have to use this specific
* kseq's transferable count, otherwise we can steal from other members
* of the group.
*/
if (kseq->ksq_group == KSEQ_CPU(low_cpu)->ksq_group)
transferable = kseq->ksq_transferable;
else
transferable = kseq->ksq_group->ksg_transferable;
if (high->ksq_group == low->ksq_group) {
transferable = high->ksq_transferable;
high_load = high->ksq_load;
low_load = low->ksq_load;
/*
* XXX If we encounter cpu 0 we must remember to reduce it's
* load by 1 to reflect the swi that is running the callout.
* At some point we should really fix load balancing of the
* swi and then this wont matter.
*/
if (high == KSEQ_CPU(0))
high_load--;
if (low == KSEQ_CPU(0))
low_load--;
} else {
transferable = high->ksq_group->ksg_transferable;
high_load = high->ksq_group->ksg_load;
low_load = low->ksq_group->ksg_load;
}
if (transferable == 0)
goto out;
return;
/*
* Determine what the imbalance is and then adjust that to how many
* kses we actually have to give up (transferable).
*/
diff = kseq->ksq_load - low_load;
diff = high_load - low_load;
move = diff / 2;
if (diff & 0x1)
move++;
move = min(move, transferable);
for (i = 0; i < move; i++)
kseq_move(kseq, low_cpu);
out:
mtx_unlock_spin(&sched_lock);
callout_reset(&kseq_lb_callout, hz, sched_balance, NULL);
kseq_move(high, KSEQ_ID(low));
return;
}
@ -763,6 +831,7 @@ static void
sched_setup(void *dummy)
{
#ifdef SMP
int balance_groups;
int i;
#endif
@ -770,6 +839,7 @@ sched_setup(void *dummy)
slice_max = (hz/7); /* ~140ms */
#ifdef SMP
balance_groups = 0;
/*
* Initialize the kseqs.
*/
@ -795,6 +865,7 @@ sched_setup(void *dummy)
ksg->ksg_cpus = 1;
ksg->ksg_idlemask = 0;
ksg->ksg_cpumask = ksg->ksg_mask = 1 << i;
ksg->ksg_load = 0;
ksg->ksg_transferable = 0;
LIST_INIT(&ksg->ksg_members);
LIST_INSERT_HEAD(&ksg->ksg_members, ksq, ksq_siblings);
@ -811,6 +882,7 @@ sched_setup(void *dummy)
* Initialize the group.
*/
ksg->ksg_idlemask = 0;
ksg->ksg_load = 0;
ksg->ksg_transferable = 0;
ksg->ksg_cpus = cg->cg_count;
ksg->ksg_cpumask = cg->cg_mask;
@ -828,10 +900,21 @@ sched_setup(void *dummy)
&kseq_cpu[j], ksq_siblings);
}
}
if (ksg->ksg_cpus > 1)
balance_groups = 1;
}
ksg_maxid = smp_topology->ct_count - 1;
}
callout_init(&kseq_lb_callout, CALLOUT_MPSAFE);
callout_init(&kseq_group_callout, CALLOUT_MPSAFE);
sched_balance(NULL);
/*
* Stagger the group and global load balancer so they do not
* interfere with each other.
*/
if (balance_groups)
callout_reset(&kseq_group_callout, hz / 2,
sched_balance_groups, NULL);
#else
kseq_setup(KSEQ_SELF());
#endif