MFC of r280785, r280871, r280872, r281510, r218511 - callout fixes.

Sponsored by:   Netflix Inc.
This commit is contained in:
rrs 2015-04-17 15:39:42 +00:00
parent 8089a48fdb
commit 5b2ecd3071
5 changed files with 115 additions and 56 deletions

View File

@ -150,9 +150,12 @@ struct callout_cpu {
sbintime_t cc_lastscan;
void *cc_cookie;
u_int cc_bucket;
u_int cc_inited;
char cc_ktr_event_name[20];
};
#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION)
#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
#define cc_exec_next(cc) cc->cc_next
#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
@ -253,6 +256,7 @@ callout_callwheel_init(void *dummy)
* XXX: Clip callout to result of previous function of maxusers
* maximum 384. This is still huge, but acceptable.
*/
memset(cc_cpu, 0, sizeof(cc_cpu));
ncallout = imin(16 + maxproc + maxfiles, 18508);
TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
@ -288,6 +292,7 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
SLIST_INIT(&cc->cc_callfree);
cc->cc_inited = 1;
cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
M_CALLOUT, M_WAITOK);
for (i = 0; i < callwheelsize; i++)
@ -303,7 +308,7 @@ callout_cpu_init(struct callout_cpu *cc, int cpu)
for (i = 0; i < ncallout; i++) {
c = &cc->cc_callout[i];
callout_init(c, 0);
c->c_flags = CALLOUT_LOCAL_ALLOC;
c->c_iflags = CALLOUT_LOCAL_ALLOC;
SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
}
}
@ -440,7 +445,7 @@ callout_process(sbintime_t now)
* Consumer told us the callout may be run
* directly from hardware interrupt context.
*/
if (tmp->c_flags & CALLOUT_DIRECT) {
if (tmp->c_iflags & CALLOUT_DIRECT) {
#ifdef CALLOUT_PROFILING
++depth_dir;
#endif
@ -460,7 +465,7 @@ callout_process(sbintime_t now)
LIST_REMOVE(tmp, c_links.le);
TAILQ_INSERT_TAIL(&cc->cc_expireq,
tmp, c_links.tqe);
tmp->c_flags |= CALLOUT_PROCESSED;
tmp->c_iflags |= CALLOUT_PROCESSED;
tmp = tmpn;
}
continue;
@ -546,8 +551,11 @@ callout_cc_add(struct callout *c, struct callout_cpu *cc,
if (sbt < cc->cc_lastscan)
sbt = cc->cc_lastscan;
c->c_arg = arg;
c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_flags &= ~CALLOUT_PROCESSED;
c->c_iflags |= CALLOUT_PENDING;
c->c_iflags &= ~CALLOUT_PROCESSED;
c->c_flags |= CALLOUT_ACTIVE;
if (flags & C_DIRECT_EXEC)
c->c_iflags |= CALLOUT_DIRECT;
c->c_func = func;
c->c_time = sbt;
c->c_precision = precision;
@ -577,7 +585,7 @@ static void
callout_cc_del(struct callout *c, struct callout_cpu *cc)
{
if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
return;
c->c_func = NULL;
SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
@ -596,7 +604,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
struct lock_class *class;
struct lock_object *c_lock;
uintptr_t lock_status;
int c_flags;
int c_iflags;
#ifdef SMP
struct callout_cpu *new_cc;
void (*new_func)(void *);
@ -611,9 +619,10 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
static timeout_t *lastfunc;
#endif
KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
(CALLOUT_PENDING | CALLOUT_ACTIVE),
("softclock_call_cc: pend|act %p %x", c, c->c_flags));
KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
("softclock_call_cc: pend %p %x", c, c->c_iflags));
KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
("softclock_call_cc: act %p %x", c, c->c_flags));
class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
lock_status = 0;
if (c->c_flags & CALLOUT_SHAREDLOCK) {
@ -625,11 +634,11 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
c_lock = c->c_lock;
c_func = c->c_func;
c_arg = c->c_arg;
c_flags = c->c_flags;
if (c->c_flags & CALLOUT_LOCAL_ALLOC)
c->c_flags = CALLOUT_LOCAL_ALLOC;
c_iflags = c->c_iflags;
if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
c->c_iflags = CALLOUT_LOCAL_ALLOC;
else
c->c_flags &= ~CALLOUT_PENDING;
c->c_iflags &= ~CALLOUT_PENDING;
cc_exec_curr(cc, direct) = c;
cc_exec_cancel(cc, direct) = false;
@ -692,7 +701,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
#endif
KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
CTR1(KTR_CALLOUT, "callout %p finished", c);
if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
@ -712,14 +721,14 @@ skip:
* It should be assert here that the callout is not
* destroyed but that is not easy.
*/
c->c_flags &= ~CALLOUT_DFRMIGRATION;
c->c_iflags &= ~CALLOUT_DFRMIGRATION;
}
cc_exec_waiting(cc, direct) = false;
CC_UNLOCK(cc);
wakeup(&cc_exec_waiting(cc, direct));
CC_LOCK(cc);
} else if (cc_cce_migrating(cc, direct)) {
KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
("Migrating legacy callout %p", c));
#ifdef SMP
/*
@ -746,7 +755,7 @@ skip:
callout_cc_del(c, cc);
return;
}
c->c_flags &= ~CALLOUT_DFRMIGRATION;
c->c_iflags &= ~CALLOUT_DFRMIGRATION;
new_cc = callout_cpu_switch(c, cc, new_cpu);
flags = (direct) ? C_DIRECT_EXEC : 0;
@ -762,14 +771,14 @@ skip:
* If the current callout is locally allocated (from
* timeout(9)) then put it on the freelist.
*
* Note: we need to check the cached copy of c_flags because
* Note: we need to check the cached copy of c_iflags because
* if it was not local, then it's not safe to deref the
* callout pointer.
*/
KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
c->c_flags == CALLOUT_LOCAL_ALLOC,
KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
c->c_iflags == CALLOUT_LOCAL_ALLOC,
("corrupted callout"));
if (c_flags & CALLOUT_LOCAL_ALLOC)
if (c_iflags & CALLOUT_LOCAL_ALLOC)
callout_cc_del(c, cc);
}
@ -912,8 +921,16 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
sbintime_t to_sbt, pr;
struct callout_cpu *cc;
int cancelled, direct;
int ignore_cpu=0;
cancelled = 0;
if (cpu == -1) {
ignore_cpu = 1;
} else if ((cpu >= MAXCPU) ||
(cc_cpu[cpu].cc_inited == 0)) {
/* Invalid CPU spec */
panic("Invalid CPU in callout %d", cpu);
}
if (flags & C_ABSOLUTE) {
to_sbt = sbt;
} else {
@ -955,24 +972,29 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
if (pr > precision)
precision = pr;
}
/*
* Don't allow migration of pre-allocated callouts lest they
* become unbalanced.
*/
if (c->c_flags & CALLOUT_LOCAL_ALLOC)
cpu = c->c_cpu;
/*
* This flag used to be added by callout_cc_add, but the
* first time you call this we could end up with the
* wrong direct flag if we don't do it before we add.
*/
if (flags & C_DIRECT_EXEC) {
c->c_flags |= CALLOUT_DIRECT;
direct = 1;
} else {
direct = 0;
}
direct = (c->c_flags & CALLOUT_DIRECT) != 0;
KASSERT(!direct || c->c_lock == NULL,
("%s: direct callout %p has lock", __func__, c));
cc = callout_lock(c);
/*
* Don't allow migration of pre-allocated callouts lest they
* become unbalanced or handle the case where the user does
* not care.
*/
if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
ignore_cpu) {
cpu = c->c_cpu;
}
if (cc_exec_curr(cc, direct) == c) {
/*
* We're being asked to reschedule a callout which is
@ -1012,15 +1034,17 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
}
#endif
}
if (c->c_flags & CALLOUT_PENDING) {
if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
if (c->c_iflags & CALLOUT_PENDING) {
if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
if (cc_exec_next(cc) == c)
cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
LIST_REMOVE(c, c_links.le);
} else
} else {
TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
}
cancelled = 1;
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_iflags &= ~ CALLOUT_PENDING;
c->c_flags &= ~ CALLOUT_ACTIVE;
}
#ifdef SMP
@ -1052,7 +1076,8 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
cc_migration_prec(cc, direct) = precision;
cc_migration_func(cc, direct) = ftn;
cc_migration_arg(cc, direct) = arg;
c->c_flags |= (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
c->c_flags |= CALLOUT_ACTIVE;
CTR6(KTR_CALLOUT,
"migration of %p func %p arg %p in %d.%08x to %u deferred",
c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
@ -1112,14 +1137,19 @@ _callout_stop_safe(c, safe)
}
} else
use_lock = 0;
direct = (c->c_flags & CALLOUT_DIRECT) != 0;
if (c->c_iflags & CALLOUT_DIRECT) {
direct = 1;
} else {
direct = 0;
}
sq_locked = 0;
old_cc = NULL;
again:
cc = callout_lock(c);
if ((c->c_flags & (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) ==
(CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) {
if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
(CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
/*
* Special case where this slipped in while we
* were migrating *as* the callout is about to
@ -1132,7 +1162,8 @@ again:
* on one yet). When the callout wheel runs,
* it will ignore this callout.
*/
c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_ACTIVE);
c->c_iflags &= ~CALLOUT_PENDING;
c->c_flags &= ~CALLOUT_ACTIVE;
not_on_a_list = 1;
} else {
not_on_a_list = 0;
@ -1160,7 +1191,7 @@ again:
* don't attempt to remove it from the queue. We can try to
* stop it by other means however.
*/
if (!(c->c_flags & CALLOUT_PENDING)) {
if (!(c->c_iflags & CALLOUT_PENDING)) {
c->c_flags &= ~CALLOUT_ACTIVE;
/*
@ -1248,6 +1279,16 @@ again:
c, c->c_func, c->c_arg);
KASSERT(!cc_cce_migrating(cc, direct),
("callout wrongly scheduled for migration"));
if (callout_migrating(c)) {
c->c_iflags &= ~CALLOUT_DFRMIGRATION;
#ifdef SMP
cc_migration_cpu(cc, direct) = CPUBLOCK;
cc_migration_time(cc, direct) = 0;
cc_migration_prec(cc, direct) = 0;
cc_migration_func(cc, direct) = NULL;
cc_migration_arg(cc, direct) = NULL;
#endif
}
CC_UNLOCK(cc);
KASSERT(!sq_locked, ("sleepqueue chain locked"));
return (1);
@ -1260,7 +1301,7 @@ again:
* but we can't stop the one thats running so
* we return 0.
*/
c->c_flags &= ~CALLOUT_DFRMIGRATION;
c->c_iflags &= ~CALLOUT_DFRMIGRATION;
#ifdef SMP
/*
* We can't call cc_cce_cleanup here since
@ -1289,17 +1330,19 @@ again:
if (sq_locked)
sleepq_release(&cc_exec_waiting(cc, direct));
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
c->c_iflags &= ~CALLOUT_PENDING;
c->c_flags &= ~CALLOUT_ACTIVE;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
if (not_on_a_list == 0) {
if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
if (cc_exec_next(cc) == c)
cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
LIST_REMOVE(c, c_links.le);
} else
} else {
TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
}
}
callout_cc_del(c, cc);
CC_UNLOCK(cc);
@ -1314,10 +1357,10 @@ callout_init(c, mpsafe)
bzero(c, sizeof *c);
if (mpsafe) {
c->c_lock = NULL;
c->c_flags = CALLOUT_RETURNUNLOCKED;
c->c_iflags = CALLOUT_RETURNUNLOCKED;
} else {
c->c_lock = &Giant.lock_object;
c->c_flags = 0;
c->c_iflags = 0;
}
c->c_cpu = timeout_cpu;
}
@ -1337,7 +1380,7 @@ _callout_init_lock(c, lock, flags)
KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
(LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
__func__));
c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
c->c_cpu = timeout_cpu;
}

View File

@ -115,7 +115,7 @@ typedef struct callout sscop_timer_t;
ng_callout(&(S)->t_##T, (S)->aarg, NULL, \
hz * (S)->timer##T / 1000, T##_func, (S), 0); \
} while (0)
#define TIMER_ISACT(S, T) ((S)->t_##T.c_flags & (CALLOUT_PENDING))
#define TIMER_ISACT(S, T) (callout_pending(&(S)->t_##T))
/*
* This assumes, that the user argument is the node pointer.

View File

@ -87,8 +87,8 @@ struct uni_timer {
#define _TIMER_STOP(UNI,FIELD) do { \
ng_uncallout(&FIELD.c, (UNI)->arg); \
} while (0)
#define TIMER_ISACT(UNI,T) ((UNI)->T.c.c_flags & (CALLOUT_ACTIVE | \
CALLOUT_PENDING))
#define TIMER_ISACT(UNI,T) (callout_active(&(UNI)->T.c) || \
callout_pending(&(UNI)->T.c))
#define _TIMER_START(UNI,ARG,FIELD,DUE,FUNC) do { \
_TIMER_STOP(UNI, FIELD); \
ng_callout(&FIELD.c, (UNI)->arg, NULL, \

View File

@ -57,7 +57,8 @@ struct callout {
void *c_arg; /* function argument */
void (*c_func)(void *); /* function to call */
struct lock_object *c_lock; /* lock to handle */
int c_flags; /* state of this entry */
short c_flags; /* User State */
short c_iflags; /* Internal State */
volatile int c_cpu; /* CPU we're scheduled on */
};

View File

@ -63,8 +63,23 @@ struct callout_handle {
};
#ifdef _KERNEL
/*
* Note the flags field is actually *two* fields. The c_flags
* field is the one that caller operations that may, or may not have
* a lock touches i.e. callout_deactivate(). The other, the c_iflags,
* is the internal flags that *must* be kept correct on which the
* callout system depend on e.g. callout_pending().
* The c_iflag is used internally by the callout system to determine which
* list the callout is on and track internal state. Callers *should not*
* use the c_flags field directly but should use the macros provided.
*
* The c_iflags field holds internal flags that are protected by internal
* locks of the callout subsystem. The c_flags field holds external flags.
* The caller must hold its own lock while manipulating or reading external
* flags via callout_active(), callout_deactivate(), callout_reset*(), or
* callout_stop() to avoid races.
*/
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_migrating(c) ((c)->c_flags & CALLOUT_DFRMIGRATION)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
#define callout_drain(c) _callout_stop_safe(c, 1)
void callout_init(struct callout *, int);
@ -78,11 +93,11 @@ void _callout_init_lock(struct callout *, struct lock_object *, int);
#define callout_init_rw(c, rw, flags) \
_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \
NULL, (flags))
#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
#define callout_pending(c) ((c)->c_iflags & CALLOUT_PENDING)
int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
void (*)(void *), void *, int, int);
#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, (flags))
callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), -1, (flags))
#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), PCPU_GET(cpuid),\
(flags))
@ -90,14 +105,14 @@ int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
callout_reset_sbt_on((c), tick_sbt * (to_ticks), 0, (fn), (arg), \
(cpu), C_HARDCLOCK)
#define callout_reset(c, on_tick, fn, arg) \
callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu)
callout_reset_on((c), (on_tick), (fn), (arg), -1)
#define callout_reset_curcpu(c, on_tick, fn, arg) \
callout_reset_on((c), (on_tick), (fn), (arg), PCPU_GET(cpuid))
#define callout_schedule_sbt_on(c, sbt, pr, cpu, flags) \
callout_reset_sbt_on((c), (sbt), (pr), (c)->c_func, (c)->c_arg, \
(cpu), (flags))
#define callout_schedule_sbt(c, sbt, pr, flags) \
callout_schedule_sbt_on((c), (sbt), (pr), (c)->c_cpu, (flags))
callout_schedule_sbt_on((c), (sbt), (pr), -1, (flags))
#define callout_schedule_sbt_curcpu(c, sbt, pr, flags) \
callout_schedule_sbt_on((c), (sbt), (pr), PCPU_GET(cpuid), (flags))
int callout_schedule(struct callout *, int);