This fixes two conditions that can incur when migration

is being done in the callout code and harmonizes the macro
use.:
1) The callout_active() will lie. Basically if a migration
   is occuring and the callout is about to expire and the
   migration has been deferred, the callout_active will no
   longer return true until after the migration. This confuses
   and breaks callers that are doing callout_init(&c, 1); such
   as TCP.
2) The migration code had a bug in it where when migrating, if
   a two calls to callout_reset came in and they both collided with
   the callout on the wheel about to run, then the second call to
   callout_reset would corrupt the list the callout wheel uses
   putting the callout thread into a endless loop.
3) Per imp, I have fixed all the macro occurance in the code that
   were for the most part being ignored.

Phabricator D1711 and looked at by lstewart and jhb and sbruno.
Reviewed by:	kostikbel, imp, adrian, hselasky
MFC after:	3 days
Sponsored by:	Netflix Inc.
This commit is contained in:
rrs 2015-02-09 19:19:44 +00:00
parent 9fc7114330
commit 344ecf88af
2 changed files with 155 additions and 86 deletions

View File

@ -166,26 +166,16 @@ struct callout_cpu {
char cc_ktr_event_name[20];
};
#define cc_exec_curr cc_exec_entity[0].cc_curr
#define cc_exec_next cc_exec_entity[0].cc_next
#define cc_exec_cancel cc_exec_entity[0].cc_cancel
#define cc_exec_waiting cc_exec_entity[0].cc_waiting
#define cc_exec_curr_dir cc_exec_entity[1].cc_curr
#define cc_exec_next_dir cc_exec_entity[1].cc_next
#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel
#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting
#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr
#define cc_exec_next(cc, dir) cc->cc_exec_entity[dir].cc_next
#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel
#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting
#ifdef SMP
#define cc_migration_func cc_exec_entity[0].ce_migration_func
#define cc_migration_arg cc_exec_entity[0].ce_migration_arg
#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu
#define cc_migration_time cc_exec_entity[0].ce_migration_time
#define cc_migration_prec cc_exec_entity[0].ce_migration_prec
#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func
#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg
#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu
#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time
#define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec
#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func
#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg
#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu
#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time
#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec
struct callout_cpu cc_cpu[MAXCPU];
#define CPUBLOCK MAXCPU
@ -235,16 +225,16 @@ static void
cc_cce_cleanup(struct callout_cpu *cc, int direct)
{
cc->cc_exec_entity[direct].cc_curr = NULL;
cc->cc_exec_entity[direct].cc_next = NULL;
cc->cc_exec_entity[direct].cc_cancel = false;
cc->cc_exec_entity[direct].cc_waiting = false;
cc_exec_curr(cc, direct) = NULL;
cc_exec_next(cc, direct) = NULL;
cc_exec_cancel(cc, direct) = false;
cc_exec_waiting(cc, direct) = false;
#ifdef SMP
cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK;
cc->cc_exec_entity[direct].ce_migration_time = 0;
cc->cc_exec_entity[direct].ce_migration_prec = 0;
cc->cc_exec_entity[direct].ce_migration_func = NULL;
cc->cc_exec_entity[direct].ce_migration_arg = NULL;
cc_migration_cpu(cc, direct) = CPUBLOCK;
cc_migration_time(cc, direct) = 0;
cc_migration_prec(cc, direct) = 0;
cc_migration_func(cc, direct) = NULL;
cc_migration_arg(cc, direct) = NULL;
#endif
}
@ -256,7 +246,7 @@ cc_cce_migrating(struct callout_cpu *cc, int direct)
{
#ifdef SMP
return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK);
return (cc_migration_cpu(cc, direct) != CPUBLOCK);
#else
return (0);
#endif
@ -492,7 +482,7 @@ callout_process(sbintime_t now)
#ifdef CALLOUT_PROFILING
++depth_dir;
#endif
cc->cc_exec_next_dir =
cc_exec_next(cc, 1) =
LIST_NEXT(tmp, c_links.le);
cc->cc_bucket = firstb & callwheelmask;
LIST_REMOVE(tmp, c_links.le);
@ -501,7 +491,7 @@ callout_process(sbintime_t now)
&mpcalls_dir, &lockcalls_dir, NULL,
#endif
1);
tmp = cc->cc_exec_next_dir;
tmp = cc_exec_next(cc, 1);
} else {
tmpn = LIST_NEXT(tmp, c_links.le);
LIST_REMOVE(tmp, c_links.le);
@ -585,7 +575,7 @@ callout_lock(struct callout *c)
static void
callout_cc_add(struct callout *c, struct callout_cpu *cc,
sbintime_t sbt, sbintime_t precision, void (*func)(void *),
void *arg, int cpu, int flags)
void *arg, int cpu, int flags, int direct)
{
int bucket;
@ -606,7 +596,7 @@ callout_cc_add(struct callout *c, struct callout_cpu *cc,
(u_int)(c->c_precision & 0xffffffff));
LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
if (cc->cc_bucket == bucket)
cc->cc_exec_next_dir = c;
cc_exec_next(cc, direct) = c;
#ifndef NO_EVENTTIMERS
/*
* Inform the eventtimers(4) subsystem there's a new callout
@ -679,8 +669,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
c->c_flags = CALLOUT_LOCAL_ALLOC;
else
c->c_flags &= ~CALLOUT_PENDING;
cc->cc_exec_entity[direct].cc_curr = c;
cc->cc_exec_entity[direct].cc_cancel = false;
cc_exec_curr(cc, direct) = c;
cc_exec_cancel(cc, direct) = false;
CC_UNLOCK(cc);
if (c_lock != NULL) {
class->lc_lock(c_lock, lock_status);
@ -688,12 +679,12 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
* The callout may have been cancelled
* while we switched locks.
*/
if (cc->cc_exec_entity[direct].cc_cancel) {
if (cc_exec_cancel(cc, direct)) {
class->lc_unlock(c_lock);
goto skip;
}
/* The callout cannot be stopped now. */
cc->cc_exec_entity[direct].cc_cancel = true;
cc_exec_cancel(cc, direct) = true;
if (c_lock == &Giant.lock_object) {
#ifdef CALLOUT_PROFILING
(*gcalls)++;
@ -744,9 +735,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr"));
cc->cc_exec_entity[direct].cc_curr = NULL;
if (cc->cc_exec_entity[direct].cc_waiting) {
KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
cc_exec_curr(cc, direct) = NULL;
if (cc_exec_waiting(cc, direct)) {
/*
* There is someone waiting for the
* callout to complete.
@ -762,9 +753,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
*/
c->c_flags &= ~CALLOUT_DFRMIGRATION;
}
cc->cc_exec_entity[direct].cc_waiting = false;
cc_exec_waiting(cc, direct) = false;
CC_UNLOCK(cc);
wakeup(&cc->cc_exec_entity[direct].cc_waiting);
wakeup(&cc_exec_waiting(cc, direct));
CC_LOCK(cc);
} else if (cc_cce_migrating(cc, direct)) {
KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
@ -774,11 +765,11 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
* If the callout was scheduled for
* migration just perform it now.
*/
new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu;
new_time = cc->cc_exec_entity[direct].ce_migration_time;
new_prec = cc->cc_exec_entity[direct].ce_migration_prec;
new_func = cc->cc_exec_entity[direct].ce_migration_func;
new_arg = cc->cc_exec_entity[direct].ce_migration_arg;
new_cpu = cc_migration_cpu(cc, direct);
new_time = cc_migration_time(cc, direct);
new_prec = cc_migration_prec(cc, direct);
new_func = cc_migration_func(cc, direct);
new_arg = cc_migration_arg(cc, direct);
cc_cce_cleanup(cc, direct);
/*
@ -787,7 +778,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
*
* As first thing, handle deferred callout stops.
*/
if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
if (!callout_migrating(c)) {
CTR3(KTR_CALLOUT,
"deferred cancelled %p func %p arg %p",
c, new_func, new_arg);
@ -799,7 +790,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc,
new_cc = callout_cpu_switch(c, cc, new_cpu);
flags = (direct) ? C_DIRECT_EXEC : 0;
callout_cc_add(c, new_cc, new_time, new_prec, new_func,
new_arg, new_cpu, flags);
new_arg, new_cpu, flags, direct);
CC_UNLOCK(new_cc);
CC_LOCK(cc);
#else
@ -1007,15 +998,15 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
KASSERT(!direct || c->c_lock == NULL,
("%s: direct callout %p has lock", __func__, c));
cc = callout_lock(c);
if (cc->cc_exec_entity[direct].cc_curr == c) {
if (cc_exec_curr(cc, direct) == c) {
/*
* We're being asked to reschedule a callout which is
* currently in progress. If there is a lock then we
* can cancel the callout if it has not really started.
*/
if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel)
cancelled = cc->cc_exec_entity[direct].cc_cancel = true;
if (cc->cc_exec_entity[direct].cc_waiting) {
if (c->c_lock != NULL && cc_exec_cancel(cc, direct))
cancelled = cc_exec_cancel(cc, direct) = true;
if (cc_exec_waiting(cc, direct)) {
/*
* Someone has called callout_drain to kill this
* callout. Don't reschedule.
@ -1026,11 +1017,30 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
CC_UNLOCK(cc);
return (cancelled);
}
#ifdef SMP
if (callout_migrating(c)) {
/*
* This only occurs when a second callout_reset_sbt_on
* is made after a previous one moved it into
* deferred migration (below). Note we do *not* change
* the prev_cpu even though the previous target may
* be different.
*/
cc_migration_cpu(cc, direct) = cpu;
cc_migration_time(cc, direct) = to_sbt;
cc_migration_prec(cc, direct) = precision;
cc_migration_func(cc, direct) = ftn;
cc_migration_arg(cc, direct) = arg;
cancelled = 1;
CC_UNLOCK(cc);
return (cancelled);
}
#endif
}
if (c->c_flags & CALLOUT_PENDING) {
if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
if (cc->cc_exec_next_dir == c)
cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
if (cc_exec_next(cc, direct) == c)
cc_exec_next(cc, direct) = LIST_NEXT(c, c_links.le);
LIST_REMOVE(c, c_links.le);
} else
TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
@ -1045,15 +1055,29 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
* to a more appropriate moment.
*/
if (c->c_cpu != cpu) {
if (cc->cc_exec_entity[direct].cc_curr == c) {
cc->cc_exec_entity[direct].ce_migration_cpu = cpu;
cc->cc_exec_entity[direct].ce_migration_time
= to_sbt;
cc->cc_exec_entity[direct].ce_migration_prec
= precision;
cc->cc_exec_entity[direct].ce_migration_func = ftn;
cc->cc_exec_entity[direct].ce_migration_arg = arg;
c->c_flags |= CALLOUT_DFRMIGRATION;
if (cc_exec_curr(cc, direct) == c) {
/*
* Pending will have been removed since we are
* actually executing the callout on another
* CPU. That callout should be waiting on the
* lock the caller holds. If we set both
* active/and/pending after we return and the
* lock on the executing callout proceeds, it
* will then see pending is true and return.
* At the return from the actual callout execution
* the migration will occur in softclock_call_cc
* and this new callout will be placed on the
* new CPU via a call to callout_cpu_switch() which
* will get the lock on the right CPU followed
* by a call callout_cc_add() which will add it there.
* (see above in softclock_call_cc()).
*/
cc_migration_cpu(cc, direct) = cpu;
cc_migration_time(cc, direct) = to_sbt;
cc_migration_prec(cc, direct) = precision;
cc_migration_func(cc, direct) = ftn;
cc_migration_arg(cc, direct) = arg;
c->c_flags |= (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING);
CTR6(KTR_CALLOUT,
"migration of %p func %p arg %p in %d.%08x to %u deferred",
c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
@ -1065,7 +1089,7 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
}
#endif
callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags, direct);
CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
(u_int)(to_sbt & 0xffffffff));
@ -1095,6 +1119,7 @@ _callout_stop_safe(struct callout *c, int safe)
struct callout_cpu *cc, *old_cc;
struct lock_class *class;
int direct, sq_locked, use_lock;
int not_on_a_list;
if (safe)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
@ -1120,6 +1145,26 @@ _callout_stop_safe(struct callout *c, int safe)
again:
cc = callout_lock(c);
if ((c->c_flags & (CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) ==
(CALLOUT_DFRMIGRATION | CALLOUT_ACTIVE | CALLOUT_PENDING)) {
/*
* Special case where this slipped in while we
* were migrating *as* the callout is about to
* execute. The caller probably holds the lock
* the callout wants.
*
* Get rid of the migration first. Then set
* the flag that tells this code *not* to
* try to remove it from any lists (its not
* on one yet). When the callout wheel runs,
* it will ignore this callout.
*/
c->c_flags &= ~(CALLOUT_PENDING|CALLOUT_ACTIVE);
not_on_a_list = 1;
} else {
not_on_a_list = 0;
}
/*
* If the callout was migrating while the callout cpu lock was
* dropped, just drop the sleepqueue lock and check the states
@ -1128,7 +1173,7 @@ _callout_stop_safe(struct callout *c, int safe)
if (sq_locked != 0 && cc != old_cc) {
#ifdef SMP
CC_UNLOCK(cc);
sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting);
sleepq_release(&cc_exec_waiting(old_cc, direct));
sq_locked = 0;
old_cc = NULL;
goto again;
@ -1149,13 +1194,12 @@ _callout_stop_safe(struct callout *c, int safe)
* If it wasn't on the queue and it isn't the current
* callout, then we can't stop it, so just bail.
*/
if (cc->cc_exec_entity[direct].cc_curr != c) {
if (cc_exec_curr(cc, direct) != c) {
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
CC_UNLOCK(cc);
if (sq_locked)
sleepq_release(
&cc->cc_exec_entity[direct].cc_waiting);
sleepq_release(&cc_exec_waiting(cc, direct));
return (0);
}
@ -1166,7 +1210,7 @@ _callout_stop_safe(struct callout *c, int safe)
* just wait for the current invocation to
* finish.
*/
while (cc->cc_exec_entity[direct].cc_curr == c) {
while (cc_exec_curr(cc, direct) == c) {
/*
* Use direct calls to sleepqueue interface
* instead of cv/msleep in order to avoid
@ -1187,7 +1231,7 @@ _callout_stop_safe(struct callout *c, int safe)
if (!sq_locked) {
CC_UNLOCK(cc);
sleepq_lock(
&cc->cc_exec_entity[direct].cc_waiting);
&cc_exec_waiting(cc, direct));
sq_locked = 1;
old_cc = cc;
goto again;
@ -1199,15 +1243,15 @@ _callout_stop_safe(struct callout *c, int safe)
* will be packed up, just let softclock()
* take care of it.
*/
cc->cc_exec_entity[direct].cc_waiting = true;
cc_exec_waiting(cc, direct) = true;
DROP_GIANT();
CC_UNLOCK(cc);
sleepq_add(
&cc->cc_exec_entity[direct].cc_waiting,
&cc_exec_waiting(cc, direct),
&cc->cc_lock.lock_object, "codrain",
SLEEPQ_SLEEP, 0);
sleepq_wait(
&cc->cc_exec_entity[direct].cc_waiting,
&cc_exec_waiting(cc, direct),
0);
sq_locked = 0;
old_cc = NULL;
@ -1217,7 +1261,8 @@ _callout_stop_safe(struct callout *c, int safe)
CC_LOCK(cc);
}
} else if (use_lock &&
!cc->cc_exec_entity[direct].cc_cancel) {
!cc_exec_cancel(cc, direct)) {
/*
* The current callout is waiting for its
* lock which we hold. Cancel the callout
@ -1225,7 +1270,7 @@ _callout_stop_safe(struct callout *c, int safe)
* lock, the callout will be skipped in
* softclock().
*/
cc->cc_exec_entity[direct].cc_cancel = true;
cc_exec_cancel(cc, direct) = true;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
KASSERT(!cc_cce_migrating(cc, direct),
@ -1233,12 +1278,34 @@ _callout_stop_safe(struct callout *c, int safe)
CC_UNLOCK(cc);
KASSERT(!sq_locked, ("sleepqueue chain locked"));
return (1);
} else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
} else if (callout_migrating(c)) {
/*
* The callout is currently being serviced
* and the "next" callout is scheduled at
* its completion with a migration. We remove
* the migration flag so it *won't* get rescheduled,
* but we can't stop the one thats running so
* we return 0.
*/
c->c_flags &= ~CALLOUT_DFRMIGRATION;
#ifdef SMP
/*
* We can't call cc_cce_cleanup here since
* if we do it will remove .ce_curr and
* its still running. This will prevent a
* reschedule of the callout when the
* execution completes.
*/
cc_migration_cpu(cc, direct) = CPUBLOCK;
cc_migration_time(cc, direct) = 0;
cc_migration_prec(cc, direct) = 0;
cc_migration_func(cc, direct) = NULL;
cc_migration_arg(cc, direct) = NULL;
#endif
CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
c, c->c_func, c->c_arg);
CC_UNLOCK(cc);
return (1);
return (0);
}
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
@ -1247,20 +1314,21 @@ _callout_stop_safe(struct callout *c, int safe)
return (0);
}
if (sq_locked)
sleepq_release(&cc->cc_exec_entity[direct].cc_waiting);
sleepq_release(&cc_exec_waiting(cc, direct));
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
if (cc->cc_exec_next_dir == c)
cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
LIST_REMOVE(c, c_links.le);
} else
TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
if (not_on_a_list == 0) {
if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
if (cc_exec_next(cc, direct) == c)
cc_exec_next(cc, direct) = LIST_NEXT(c, c_links.le);
LIST_REMOVE(c, c_links.le);
} else
TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
}
callout_cc_del(c, cc);
CC_UNLOCK(cc);
return (1);
}

View File

@ -64,6 +64,7 @@ struct callout_handle {
#ifdef _KERNEL
#define callout_active(c) ((c)->c_flags & CALLOUT_ACTIVE)
#define callout_migrating(c) ((c)->c_flags & CALLOUT_DFRMIGRATION)
#define callout_deactivate(c) ((c)->c_flags &= ~CALLOUT_ACTIVE)
#define callout_drain(c) _callout_stop_safe(c, 1)
void callout_init(struct callout *, int);